In [23]:
#Importing all the libraries
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
In [24]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
In [25]:
df = pd.read_csv('./disease-burden-by-risk-factor.csv')

# Define new column names
new_columns = {
    'DALYs that are from all causes attributed to low physical activity, in both sexes aged all ages': 'low_physical_activity',
    'DALYs that are from all causes attributed to non-exclusive breastfeeding, in both sexes aged all ages': 'non_exclusive_breastfeeding',
    'DALYs that are from all causes attributed to air pollution, in both sexes aged all ages': 'air_pollution',
    'DALYs that are from all causes attributed to child wasting, in both sexes aged all ages': 'child_wasting',
    'DALYs that are from all causes attributed to high systolic blood pressure, in both sexes aged all ages': 'high_systolic_blood_pressure',
    'DALYs that are from all causes attributed to high fasting plasma glucose, in both sexes aged all ages': 'high_fasting_plasma_glucose',
    'DALYs that are from all causes attributed to child stunting, in both sexes aged all ages': 'child_stunting',
    'DALYs that are from all causes attributed to high body-mass index, in both sexes aged all ages': 'high_body_mass_index',
    'DALYs that are from all causes attributed to secondhand smoke, in both sexes aged all ages': 'secondhand_smoke',
    'DALYs that are from all causes attributed to unsafe sanitation, in both sexes aged all ages': 'unsafe_sanitation',
    'DALYs that are from all causes attributed to unsafe water source, in both sexes aged all ages': 'unsafe_water_source',
    'DALYs that are from all causes attributed to diet low in vegetables, in both sexes aged all ages': 'diet_low_in_vegetables',
    'DALYs that are from all causes attributed to diet low in fruits, in both sexes aged all ages': 'diet_low_in_fruits',
    'DALYs that are from all causes attributed to diet high in sodium, in both sexes aged all ages': 'diet_high_in_sodium',
    'DALYs that are from all causes attributed to drug use, in both sexes aged all ages': 'drug_use',
    'DALYs that are from all causes attributed to household air pollution from solid fuels, in both sexes aged all ages': 'household_air_pollution',
    'DALYs that are from all causes attributed to high ldl cholesterol, in both sexes aged all ages': 'high_ldl_cholesterol',
    'DALYs that are from all causes attributed to iron deficiency, in both sexes aged all ages': 'iron_deficiency',
    'DALYs that are from all causes attributed to zinc deficiency, in both sexes aged all ages': 'zinc_deficiency',
    'DALYs that are from all causes attributed to smoking, in both sexes aged all ages': 'smoking',
    'DALYs that are from all causes attributed to vitamin a deficiency, in both sexes aged all ages': 'vitamin_a_deficiency',
    'DALYs that are from all causes attributed to ambient particulate matter pollution, in both sexes aged all ages': 'ambient_particulate_matter_pollution'
}

# Rename the columns
df.rename(columns=new_columns, inplace=True)

# Print the columns to verify
print(df.columns)
Index(['Entity', 'Code', 'Year', 'low_physical_activity',
       'non_exclusive_breastfeeding', 'air_pollution', 'child_wasting',
       'high_systolic_blood_pressure', 'high_fasting_plasma_glucose',
       'child_stunting', 'high_body_mass_index', 'secondhand_smoke',
       'unsafe_sanitation', 'unsafe_water_source', 'diet_low_in_vegetables',
       'diet_low_in_fruits', 'diet_high_in_sodium', 'drug_use',
       'household_air_pollution', 'high_ldl_cholesterol', 'iron_deficiency',
       'zinc_deficiency', 'smoking', 'vitamin_a_deficiency',
       'ambient_particulate_matter_pollution'],
      dtype='object')
In [26]:
df1 = pd.read_csv('./share-of-total-disease-burden-by-cause.csv')

# Define new column names
new_columns_df1 = {
    'DALYs (Disability-Adjusted Life Years) - Self-harm - Sex: Both - Age: All Ages (Percent)': 'self_harm',
    'DALYs (Disability-Adjusted Life Years) - Exposure to forces of nature - Sex: Both - Age: All Ages (Percent)': 'exposure_to_forces_of_nature',
    'DALYs (Disability-Adjusted Life Years) - Conflict and terrorism - Sex: Both - Age: All Ages (Percent)': 'conflict_and_terrorism',
    'DALYs (Disability-Adjusted Life Years) - Interpersonal violence - Sex: Both - Age: All Ages (Percent)': 'interpersonal_violence',
    'DALYs (Disability-Adjusted Life Years) - Neglected tropical diseases and malaria - Sex: Both - Age: All Ages (Percent)': 'neglected_tropical_diseases_and_malaria',
    'DALYs (Disability-Adjusted Life Years) - Substance use disorders - Sex: Both - Age: All Ages (Percent)': 'substance_use_disorders',
    'DALYs (Disability-Adjusted Life Years) - Skin and subcutaneous diseases - Sex: Both - Age: All Ages (Percent)': 'skin_and_subcutaneous_diseases',
    'DALYs (Disability-Adjusted Life Years) - Enteric infections - Sex: Both - Age: All Ages (Percent)': 'enteric_infections',
    'DALYs (Disability-Adjusted Life Years) - Diabetes and kidney diseases - Sex: Both - Age: All Ages (Percent)': 'diabetes_and_kidney_diseases',
    'DALYs (Disability-Adjusted Life Years) - Cardiovascular diseases - Sex: Both - Age: All Ages (Percent)': 'cardiovascular_diseases',
    'DALYs (Disability-Adjusted Life Years) - Digestive diseases - Sex: Both - Age: All Ages (Percent)': 'digestive_diseases',
    'DALYs (Disability-Adjusted Life Years) - Nutritional deficiencies - Sex: Both - Age: All Ages (Percent)': 'nutritional_deficiencies',
    'DALYs (Disability-Adjusted Life Years) - Respiratory infections and tuberculosis - Sex: Both - Age: All Ages (Percent)': 'respiratory_infections_and_tuberculosis',
    'DALYs (Disability-Adjusted Life Years) - Neonatal disorders - Sex: Both - Age: All Ages (Percent)': 'neonatal_disorders',
    'DALYs (Disability-Adjusted Life Years) - Chronic respiratory diseases - Sex: Both - Age: All Ages (Percent)': 'chronic_respiratory_diseases',
    'DALYs (Disability-Adjusted Life Years) - Other non-communicable diseases - Sex: Both - Age: All Ages (Percent)': 'other_non_communicable_diseases',
    'DALYs (Disability-Adjusted Life Years) - Maternal disorders - Sex: Both - Age: All Ages (Percent)': 'maternal_disorders',
    'DALYs (Disability-Adjusted Life Years) - Unintentional injuries - Sex: Both - Age: All Ages (Percent)': 'unintentional_injuries',
    'DALYs (Disability-Adjusted Life Years) - Musculoskeletal disorders - Sex: Both - Age: All Ages (Percent)': 'musculoskeletal_disorders',
    'DALYs (Disability-Adjusted Life Years) - Neoplasms - Sex: Both - Age: All Ages (Percent)': 'neoplasms',
    'DALYs (Disability-Adjusted Life Years) - Mental disorders - Sex: Both - Age: All Ages (Percent)': 'mental_disorders',
    'DALYs (Disability-Adjusted Life Years) - Neurological disorders - Sex: Both - Age: All Ages (Percent)': 'neurological_disorders',
    'DALYs (Disability-Adjusted Life Years) - HIV/AIDS and sexually transmitted infections - Sex: Both - Age: All Ages (Percent)': 'HIV_and_sexually_transmitted_infections',
    'DALYs (Disability-Adjusted Life Years) - Transport injuries - Sex: Both - Age: All Ages (Percent)': 'transport_injuries',
    'DALYs (Disability-Adjusted Life Years) - Sense organ diseases - Sex: Both - Age: All Ages (Percent)': 'sense_organ_diseases'
}

# Rename the columns
df1.rename(columns=new_columns_df1, inplace=True)

# Print the columns to verify
print(df1.columns)

# Print the DataFrame
df1
Index(['Entity', 'Code', 'Year', 'self_harm', 'exposure_to_forces_of_nature',
       'conflict_and_terrorism', 'interpersonal_violence',
       'neglected_tropical_diseases_and_malaria', 'substance_use_disorders',
       'skin_and_subcutaneous_diseases', 'enteric_infections',
       'diabetes_and_kidney_diseases', 'cardiovascular_diseases',
       'digestive_diseases', 'nutritional_deficiencies',
       'respiratory_infections_and_tuberculosis', 'neonatal_disorders',
       'chronic_respiratory_diseases', 'other_non_communicable_diseases',
       'maternal_disorders', 'unintentional_injuries',
       'musculoskeletal_disorders', 'neoplasms', 'mental_disorders',
       'neurological_disorders', 'HIV_and_sexually_transmitted_infections',
       'transport_injuries', 'sense_organ_diseases'],
      dtype='object')
Out[26]:
Entity Code Year self_harm exposure_to_forces_of_nature conflict_and_terrorism interpersonal_violence neglected_tropical_diseases_and_malaria substance_use_disorders skin_and_subcutaneous_diseases ... other_non_communicable_diseases maternal_disorders unintentional_injuries musculoskeletal_disorders neoplasms mental_disorders neurological_disorders HIV_and_sexually_transmitted_infections transport_injuries sense_organ_diseases
0 Afghanistan AFG 1990 0.31 0.01 3.69 0.83 1.12 0.19 0.42 ... 11.94 1.35 3.81 0.94 3.27 1.70 1.28 0.12 2.38 0.65
1 Afghanistan AFG 1991 0.31 0.66 4.60 1.01 1.14 0.19 0.42 ... 11.87 1.38 4.35 0.92 3.13 1.73 1.27 0.12 2.35 0.63
2 Afghanistan AFG 1992 0.33 0.29 4.94 1.06 1.10 0.20 0.44 ... 12.27 1.44 3.94 0.90 2.98 1.79 1.27 0.15 2.42 0.60
3 Afghanistan AFG 1993 0.33 0.11 4.55 1.09 0.96 0.21 0.43 ... 12.50 1.45 3.80 0.87 2.84 1.78 1.24 0.17 2.48 0.57
4 Afghanistan AFG 1994 0.32 0.08 6.23 1.10 0.92 0.20 0.42 ... 12.18 1.41 3.71 0.82 2.70 1.71 1.20 0.17 2.41 0.54
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6835 Zimbabwe ZWE 2015 1.36 0.01 0.02 1.09 3.23 0.58 0.85 ... 4.04 1.10 2.85 1.64 4.43 2.19 1.75 22.39 1.97 0.98
6836 Zimbabwe ZWE 2016 1.43 0.03 0.01 1.14 2.78 0.60 0.88 ... 4.18 1.10 3.02 1.72 4.65 2.28 1.82 21.17 2.07 1.02
6837 Zimbabwe ZWE 2017 1.48 0.22 0.01 1.17 2.85 0.63 0.91 ... 4.29 1.10 3.31 1.80 4.84 2.36 1.88 19.77 2.16 1.05
6838 Zimbabwe ZWE 2018 1.54 0.00 0.01 1.23 2.86 0.66 0.94 ... 4.44 1.11 3.19 1.90 5.08 2.47 1.97 18.05 2.26 1.10
6839 Zimbabwe ZWE 2019 1.57 0.58 0.01 1.26 2.82 0.69 0.96 ... 4.51 1.11 3.86 1.97 5.23 2.53 2.01 16.96 2.23 1.13

6840 rows × 28 columns

In [27]:
import seaborn as sns
corr_matrix = df1.drop(['Year','Entity'],axis =1).corr()

plt.figure(figsize=(18, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=.5)
plt.title('Correlation Matrix of Disease Burden by Sub-Category')
plt.show()
/var/folders/tc/t9c_fvpn08jbwz38bdj660vm0000gn/T/ipykernel_16363/1473648444.py:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  corr_matrix = df1.drop(['Year','Entity'],axis =1).corr()
In [28]:
target_column = 'mental_disorders'
related_correlations = corr_matrix[target_column]

# Create a list of columns with correlation >= 0.5
high_correlation_columns = related_correlations[related_correlations >= 0.5].index.tolist()

# Remove the target column itself from the list
high_correlation_columns.remove(target_column)

# Print the result
print("Columns with correlation >= 0.5 to '{}':".format(target_column))
print(high_correlation_columns)
Columns with correlation >= 0.5 to 'mental_disorders':
['substance_use_disorders', 'skin_and_subcutaneous_diseases', 'musculoskeletal_disorders', 'neoplasms', 'neurological_disorders', 'sense_organ_diseases']
In [29]:
def convert_to_actual_dalys(percent_str, total):
    # Convert percent_str to float
    percent = float(percent_str)
    # Perform the calculation
    return (percent / 100) * total
In [30]:
total_population = 100000
In [31]:
for col in df1.columns:
    # Exclude 'Entity', 'Code', and 'Year' columns
    if col not in ['Entity', 'Code', 'Year']:
        actual_col_name = col
        df1[actual_col_name] = df1[col].apply(convert_to_actual_dalys, total=total_population)
df1
Out[31]:
Entity Code Year self_harm exposure_to_forces_of_nature conflict_and_terrorism interpersonal_violence neglected_tropical_diseases_and_malaria substance_use_disorders skin_and_subcutaneous_diseases ... other_non_communicable_diseases maternal_disorders unintentional_injuries musculoskeletal_disorders neoplasms mental_disorders neurological_disorders HIV_and_sexually_transmitted_infections transport_injuries sense_organ_diseases
0 Afghanistan AFG 1990 310.0 10.0 3690.0 830.0 1120.0 190.0 420.0 ... 11940.0 1350.0 3810.0 940.0 3270.0 1700.0 1280.0 120.0 2380.0 650.0
1 Afghanistan AFG 1991 310.0 660.0 4600.0 1010.0 1140.0 190.0 420.0 ... 11870.0 1380.0 4350.0 920.0 3130.0 1730.0 1270.0 120.0 2350.0 630.0
2 Afghanistan AFG 1992 330.0 290.0 4940.0 1060.0 1100.0 200.0 440.0 ... 12270.0 1440.0 3940.0 900.0 2980.0 1790.0 1270.0 150.0 2420.0 600.0
3 Afghanistan AFG 1993 330.0 110.0 4550.0 1090.0 960.0 210.0 430.0 ... 12500.0 1450.0 3800.0 870.0 2840.0 1780.0 1240.0 170.0 2480.0 570.0
4 Afghanistan AFG 1994 320.0 80.0 6230.0 1100.0 920.0 200.0 420.0 ... 12180.0 1410.0 3710.0 820.0 2700.0 1710.0 1200.0 170.0 2410.0 540.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6835 Zimbabwe ZWE 2015 1360.0 10.0 20.0 1090.0 3230.0 580.0 850.0 ... 4040.0 1100.0 2850.0 1640.0 4430.0 2190.0 1750.0 22390.0 1970.0 980.0
6836 Zimbabwe ZWE 2016 1430.0 30.0 10.0 1140.0 2780.0 600.0 880.0 ... 4180.0 1100.0 3020.0 1720.0 4650.0 2280.0 1820.0 21170.0 2070.0 1020.0
6837 Zimbabwe ZWE 2017 1480.0 220.0 10.0 1170.0 2850.0 630.0 910.0 ... 4290.0 1100.0 3310.0 1800.0 4840.0 2360.0 1880.0 19770.0 2160.0 1050.0
6838 Zimbabwe ZWE 2018 1540.0 0.0 10.0 1230.0 2860.0 660.0 940.0 ... 4440.0 1110.0 3190.0 1900.0 5080.0 2470.0 1970.0 18050.0 2260.0 1100.0
6839 Zimbabwe ZWE 2019 1570.0 580.0 10.0 1260.0 2820.0 690.0 960.0 ... 4510.0 1110.0 3860.0 1970.0 5230.0 2530.0 2010.0 16960.0 2230.0 1130.0

6840 rows × 28 columns

In [32]:
df1
Out[32]:
Entity Code Year self_harm exposure_to_forces_of_nature conflict_and_terrorism interpersonal_violence neglected_tropical_diseases_and_malaria substance_use_disorders skin_and_subcutaneous_diseases ... other_non_communicable_diseases maternal_disorders unintentional_injuries musculoskeletal_disorders neoplasms mental_disorders neurological_disorders HIV_and_sexually_transmitted_infections transport_injuries sense_organ_diseases
0 Afghanistan AFG 1990 310.0 10.0 3690.0 830.0 1120.0 190.0 420.0 ... 11940.0 1350.0 3810.0 940.0 3270.0 1700.0 1280.0 120.0 2380.0 650.0
1 Afghanistan AFG 1991 310.0 660.0 4600.0 1010.0 1140.0 190.0 420.0 ... 11870.0 1380.0 4350.0 920.0 3130.0 1730.0 1270.0 120.0 2350.0 630.0
2 Afghanistan AFG 1992 330.0 290.0 4940.0 1060.0 1100.0 200.0 440.0 ... 12270.0 1440.0 3940.0 900.0 2980.0 1790.0 1270.0 150.0 2420.0 600.0
3 Afghanistan AFG 1993 330.0 110.0 4550.0 1090.0 960.0 210.0 430.0 ... 12500.0 1450.0 3800.0 870.0 2840.0 1780.0 1240.0 170.0 2480.0 570.0
4 Afghanistan AFG 1994 320.0 80.0 6230.0 1100.0 920.0 200.0 420.0 ... 12180.0 1410.0 3710.0 820.0 2700.0 1710.0 1200.0 170.0 2410.0 540.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6835 Zimbabwe ZWE 2015 1360.0 10.0 20.0 1090.0 3230.0 580.0 850.0 ... 4040.0 1100.0 2850.0 1640.0 4430.0 2190.0 1750.0 22390.0 1970.0 980.0
6836 Zimbabwe ZWE 2016 1430.0 30.0 10.0 1140.0 2780.0 600.0 880.0 ... 4180.0 1100.0 3020.0 1720.0 4650.0 2280.0 1820.0 21170.0 2070.0 1020.0
6837 Zimbabwe ZWE 2017 1480.0 220.0 10.0 1170.0 2850.0 630.0 910.0 ... 4290.0 1100.0 3310.0 1800.0 4840.0 2360.0 1880.0 19770.0 2160.0 1050.0
6838 Zimbabwe ZWE 2018 1540.0 0.0 10.0 1230.0 2860.0 660.0 940.0 ... 4440.0 1110.0 3190.0 1900.0 5080.0 2470.0 1970.0 18050.0 2260.0 1100.0
6839 Zimbabwe ZWE 2019 1570.0 580.0 10.0 1260.0 2820.0 690.0 960.0 ... 4510.0 1110.0 3860.0 1970.0 5230.0 2530.0 2010.0 16960.0 2230.0 1130.0

6840 rows × 28 columns

In [33]:
merged_df = pd.merge(df, df1[['Entity','Year','substance_use_disorders', 'skin_and_subcutaneous_diseases', 'musculoskeletal_disorders', 'neoplasms', 'neurological_disorders', 'sense_organ_diseases','mental_disorders']], on=['Year', 'Entity'])

# Display the merged DataFrame
merged_df
print(merged_df.columns)
Index(['Entity', 'Code', 'Year', 'low_physical_activity',
       'non_exclusive_breastfeeding', 'air_pollution', 'child_wasting',
       'high_systolic_blood_pressure', 'high_fasting_plasma_glucose',
       'child_stunting', 'high_body_mass_index', 'secondhand_smoke',
       'unsafe_sanitation', 'unsafe_water_source', 'diet_low_in_vegetables',
       'diet_low_in_fruits', 'diet_high_in_sodium', 'drug_use',
       'household_air_pollution', 'high_ldl_cholesterol', 'iron_deficiency',
       'zinc_deficiency', 'smoking', 'vitamin_a_deficiency',
       'ambient_particulate_matter_pollution', 'substance_use_disorders',
       'skin_and_subcutaneous_diseases', 'musculoskeletal_disorders',
       'neoplasms', 'neurological_disorders', 'sense_organ_diseases',
       'mental_disorders'],
      dtype='object')
In [34]:
merged_df
Out[34]:
Entity Code Year low_physical_activity non_exclusive_breastfeeding air_pollution child_wasting high_systolic_blood_pressure high_fasting_plasma_glucose child_stunting ... smoking vitamin_a_deficiency ambient_particulate_matter_pollution substance_use_disorders skin_and_subcutaneous_diseases musculoskeletal_disorders neoplasms neurological_disorders sense_organ_diseases mental_disorders
0 Afghanistan AFG 1990 61720.0600 197049.340 1986290.40 1708694.40 663575.50 310177.80 670056.500 ... 146352.77 184149.1000 143037.550 190.0 420.0 940.0 3270.0 1280.0 650.0 1700.0
1 Afghanistan AFG 1991 62191.6000 222485.780 2069430.00 1779057.60 670934.56 320839.94 687930.700 ... 148548.45 188899.7700 148112.800 190.0 420.0 920.0 3130.0 1270.0 630.0 1730.0
2 Afghanistan AFG 1992 63325.2340 271585.200 2298508.20 2005481.50 685869.90 335451.56 748159.940 ... 152365.47 194698.0500 163113.840 200.0 440.0 900.0 2980.0 1270.0 600.0 1790.0
3 Afghanistan AFG 1993 64873.6250 331279.280 2555748.20 2366581.00 705695.94 351943.25 862717.750 ... 157201.58 214913.9000 180597.550 210.0 430.0 870.0 2840.0 1240.0 570.0 1780.0
4 Afghanistan AFG 1994 66452.0700 340745.120 2707120.00 2559192.20 725500.00 367981.28 963305.500 ... 162079.20 246066.0500 190782.720 200.0 420.0 820.0 2700.0 1200.0 540.0 1710.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6715 Zimbabwe ZWE 2015 7196.8833 65671.530 653410.94 494563.78 288229.12 261661.61 63780.020 ... 254297.17 19442.2290 132720.830 580.0 850.0 1640.0 4430.0 1750.0 980.0 2190.0
6716 Zimbabwe ZWE 2016 7424.6733 62675.110 641920.20 489271.53 293492.00 267324.88 58934.336 ... 257688.56 15954.1960 128623.750 600.0 880.0 1720.0 4650.0 1820.0 1020.0 2280.0
6717 Zimbabwe ZWE 2017 7655.2163 60760.780 625386.40 472972.25 297975.16 271637.40 55018.605 ... 260584.25 14143.2295 122945.070 630.0 910.0 1800.0 4840.0 1880.0 1050.0 2360.0
6718 Zimbabwe ZWE 2018 7862.5596 57715.004 609545.30 453153.88 303253.00 276985.75 53059.625 ... 263968.06 14170.8080 120112.450 660.0 940.0 1900.0 5080.0 1970.0 1100.0 2470.0
6719 Zimbabwe ZWE 2019 8126.4062 56559.246 600330.00 442257.94 309965.70 283899.03 51181.336 ... 268873.30 13749.4900 119444.164 690.0 960.0 1970.0 5230.0 2010.0 1130.0 2530.0

6720 rows × 32 columns

In [35]:
corr_matrix = merged_df.drop(['Year','Entity'],axis =1).corr()
target_column = 'mental_disorders'
related_correlations = corr_matrix[target_column]
/var/folders/tc/t9c_fvpn08jbwz38bdj660vm0000gn/T/ipykernel_16363/3563348648.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  corr_matrix = merged_df.drop(['Year','Entity'],axis =1).corr()
In [36]:
plt.figure(figsize=(25, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=.5)
plt.title('Correlation Matrix of Disease Burden by Sub-Category')
plt.show()
In [37]:
missing_values = merged_df.isnull().sum()
print("Missing Values:\n", missing_values)
Missing Values:
 Entity                                    0
Code                                    570
Year                                      0
low_physical_activity                     0
non_exclusive_breastfeeding               0
air_pollution                             0
child_wasting                             0
high_systolic_blood_pressure              0
high_fasting_plasma_glucose               0
child_stunting                            0
high_body_mass_index                      0
secondhand_smoke                          0
unsafe_sanitation                         0
unsafe_water_source                       0
diet_low_in_vegetables                    0
diet_low_in_fruits                        0
diet_high_in_sodium                       0
drug_use                                  0
household_air_pollution                   0
high_ldl_cholesterol                      0
iron_deficiency                           0
zinc_deficiency                           0
smoking                                   0
vitamin_a_deficiency                      0
ambient_particulate_matter_pollution      0
substance_use_disorders                   0
skin_and_subcutaneous_diseases            0
musculoskeletal_disorders                 0
neoplasms                                 0
neurological_disorders                    0
sense_organ_diseases                      0
mental_disorders                          0
dtype: int64
In [38]:
print("\nData Types:\n", merged_df.dtypes)
Data Types:
 Entity                                   object
Code                                     object
Year                                      int64
low_physical_activity                   float64
non_exclusive_breastfeeding             float64
air_pollution                           float64
child_wasting                           float64
high_systolic_blood_pressure            float64
high_fasting_plasma_glucose             float64
child_stunting                          float64
high_body_mass_index                    float64
secondhand_smoke                        float64
unsafe_sanitation                       float64
unsafe_water_source                     float64
diet_low_in_vegetables                  float64
diet_low_in_fruits                      float64
diet_high_in_sodium                     float64
drug_use                                float64
household_air_pollution                 float64
high_ldl_cholesterol                    float64
iron_deficiency                         float64
zinc_deficiency                         float64
smoking                                 float64
vitamin_a_deficiency                    float64
ambient_particulate_matter_pollution    float64
substance_use_disorders                 float64
skin_and_subcutaneous_diseases          float64
musculoskeletal_disorders               float64
neoplasms                               float64
neurological_disorders                  float64
sense_organ_diseases                    float64
mental_disorders                        float64
dtype: object
In [39]:
print("\nSummary Statistics:\n", merged_df.describe())
Summary Statistics:
               Year  low_physical_activity  non_exclusive_breastfeeding  \
count  6720.000000           6.720000e+03                 6.720000e+03   
mean   2004.500000           2.616060e+05                 5.284880e+05   
std       8.656086           1.093153e+06                 2.553256e+06   
min    1990.000000           5.962806e+00                 2.864449e-01   
25%    1997.000000           1.955261e+03                 3.224378e+02   
50%    2004.500000           1.002872e+04                 4.687922e+03   
75%    2012.000000           4.800914e+04                 1.070237e+05   
max    2019.000000           1.574794e+07                 4.496013e+07   

       air_pollution  child_wasting  high_systolic_blood_pressure  \
count   6.720000e+03   6.720000e+03                  6.720000e+03   
mean    5.048454e+06   3.524774e+06                  4.188101e+06   
std     2.223505e+07   1.735942e+07                  1.794726e+07   
min     5.418394e+00   4.202705e+00                  5.689009e+01   
25%     2.355536e+04   2.768432e+03                  4.207587e+04   
50%     1.545772e+05   3.834691e+04                  1.867063e+05   
75%     1.013556e+06   6.910512e+05                  8.235250e+05   
max     2.807178e+08   2.951882e+08                  2.354246e+08   

       high_fasting_plasma_glucose  child_stunting  high_body_mass_index  \
count                 6.720000e+03    6.720000e+03          6.720000e+03   
mean                  2.606789e+06    8.029631e+05          2.378053e+06   
std                   1.120238e+07    4.142251e+06          1.010609e+07   
min                   6.466809e+01    3.410204e-02          5.684043e+01   
25%                   3.012893e+04    7.211207e+01          3.080627e+04   
50%                   1.247997e+05    3.192497e+03          1.163628e+05   
75%                   4.976155e+05    1.234826e+05          4.966658e+05   
max                   1.720687e+08    7.249934e+07          1.602654e+08   

       secondhand_smoke  ...       smoking  vitamin_a_deficiency  \
count      6.720000e+03  ...  6.720000e+03          6.720000e+03   
mean       8.362629e+05  ...  4.111163e+06          2.111740e+05   
std        3.701606e+06  ...  1.790273e+07          1.145686e+06   
min        1.433595e+01  ...  3.961353e+01          2.169985e-02   
25%        6.923609e+03  ...  2.649569e+04          3.938570e+01   
50%        3.332242e+04  ...  1.370987e+05          6.973932e+02   
75%        1.384867e+05  ...  5.876447e+05          2.734946e+04   
max        4.668201e+07  ...  1.997948e+08          2.012786e+07   

       ambient_particulate_matter_pollution  substance_use_disorders  \
count                          6.720000e+03              6720.000000   
mean                           1.969151e+06              1280.479167   
std                            8.765911e+06              1023.246212   
min                            5.147841e+00                40.000000   
25%                            1.332260e+04               470.000000   
50%                            6.885797e+04              1050.000000   
75%                            2.995293e+05              1732.500000   
max                            1.182154e+08              6660.000000   

       skin_and_subcutaneous_diseases  musculoskeletal_disorders  \
count                     6720.000000                6720.000000   
mean                      1623.492560                5009.494048   
std                        712.395234                3125.986089   
min                         80.000000                  90.000000   
25%                       1050.000000                2310.000000   
50%                       1610.000000                4775.000000   
75%                       2150.000000                7000.000000   
max                       3970.000000               14510.000000   

          neoplasms  neurological_disorders  sense_organ_diseases  \
count   6720.000000             6720.000000           6720.000000   
mean    9531.322917             3580.559524           1845.034226   
std     6177.814757             1723.992479            795.824869   
min      360.000000              120.000000             70.000000   
25%     4150.000000             2110.000000           1190.000000   
50%     8140.000000             3640.000000           1950.000000   
75%    14900.000000             4780.000000           2390.000000   
max    31660.000000             8690.000000           4310.000000   

       mental_disorders  
count       6720.000000  
mean        4828.456845  
std         2299.398664  
min          220.000000  
25%         3020.000000  
50%         4680.000000  
75%         6392.500000  
max        13760.000000  

[8 rows x 30 columns]
In [40]:
countries = merged_df['Entity'].unique()
countries.shape
Out[40]:
(224,)
In [41]:
countries
Out[41]:
array(['Afghanistan', 'African Region (WHO)', 'Albania', 'Algeria',
       'American Samoa', 'Andorra', 'Angola', 'Antigua and Barbuda',
       'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan',
       'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus',
       'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei',
       'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon',
       'Canada', 'Cape Verde', 'Central African Republic', 'Chad',
       'Chile', 'China', 'Colombia', 'Comoros', 'Congo', 'Cook Islands',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus',
       'Czechia', 'Democratic Republic of Congo', 'Denmark', 'Djibouti',
       'Dominica', 'Dominican Republic', 'East Asia & Pacific (WB)',
       'East Timor', 'Eastern Mediterranean Region (WHO)', 'Ecuador',
       'Egypt', 'El Salvador', 'England', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Europe & Central Asia (WB)',
       'European Region (WHO)', 'Fiji', 'Finland', 'France', 'G20',
       'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Greenland', 'Grenada', 'Guam', 'Guatemala', 'Guinea',
       'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras', 'Hungary',
       'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland',
       'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan',
       'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos',
       'Latin America & Caribbean (WB)', 'Latvia', 'Lebanon', 'Lesotho',
       'Liberia', 'Libya', 'Lithuania', 'Luxembourg', 'Madagascar',
       'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta',
       'Marshall Islands', 'Mauritania', 'Mauritius', 'Mexico',
       'Micronesia (country)', 'Middle East & North Africa (WB)',
       'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco',
       'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal',
       'Netherlands', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria',
       'Niue', 'North America (WB)', 'North Korea', 'North Macedonia',
       'Northern Ireland', 'Northern Mariana Islands', 'Norway',
       'OECD Countries', 'Oman', 'Pakistan', 'Palau', 'Palestine',
       'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines',
       'Poland', 'Portugal', 'Puerto Rico', 'Qatar',
       'Region of the Americas (WHO)', 'Romania', 'Russia', 'Rwanda',
       'Saint Kitts and Nevis', 'Saint Lucia',
       'Saint Vincent and the Grenadines', 'Samoa', 'San Marino',
       'Sao Tome and Principe', 'Saudi Arabia', 'Scotland', 'Senegal',
       'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia',
       'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa',
       'South Asia (WB)', 'South Korea', 'South Sudan',
       'South-East Asia Region (WHO)', 'Spain', 'Sri Lanka',
       'Sub-Saharan Africa (WB)', 'Sudan', 'Suriname', 'Sweden',
       'Switzerland', 'Syria', 'Taiwan', 'Tajikistan', 'Tanzania',
       'Thailand', 'Togo', 'Tokelau', 'Tonga', 'Trinidad and Tobago',
       'Tunisia', 'Turkey', 'Turkmenistan', 'Tuvalu', 'Uganda', 'Ukraine',
       'United Arab Emirates', 'United Kingdom', 'United States',
       'United States Virgin Islands', 'Uruguay', 'Uzbekistan', 'Vanuatu',
       'Venezuela', 'Vietnam', 'Wales', 'Western Pacific Region (WHO)',
       'World', 'Yemen', 'Zambia', 'Zimbabwe'], dtype=object)
In [42]:
import numpy as np

# List of 195 recognized countries
recognized_countries = [
    "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua and Barbuda", "Argentina", "Armenia",
    "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium",
    "Belize", "Benin", "Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria",
    "Burkina Faso", "Burundi", "Cabo Verde", "Cambodia", "Cameroon", "Canada", "Central African Republic", "Chad",
    "Chile", "China", "Colombia", "Comoros", "Congo, Democratic Republic of the", "Congo, Republic of the",
    "Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Denmark", "Djibouti", "Dominica", "Dominican Republic",
    "East Timor (Timor-Leste)", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Eswatini",
    "Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada",
    "Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Honduras", "Hungary", "Iceland", "India", "Indonesia",
    "Iran", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Kiribati",
    "Korea, North", "Korea, South", "Kosovo", "Kuwait", "Kyrgyzstan", "Laos", "Latvia", "Lebanon", "Lesotho", "Liberia",
    "Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali",
    "Malta", "Marshall Islands", "Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia",
    "Montenegro", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nauru", "Nepal", "Netherlands", "New Zealand",
    "Nicaragua", "Niger", "Nigeria", "North Macedonia", "Norway", "Oman", "Pakistan", "Palau", "Palestine", "Panama",
    "Papua New Guinea", "Paraguay", "Peru", "Philippines", "Poland", "Portugal", "Qatar", "Romania", "Russia", "Rwanda",
    "Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines", "Samoa", "San Marino", "Sao Tome and Principe",
    "Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia", "Solomon Islands",
    "Somalia", "South Africa", "South Sudan", "Spain", "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syria",
    "Taiwan", "Tajikistan", "Tanzania", "Thailand", "Togo", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan",
    "Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States", "Uruguay", "Uzbekistan", "Vanuatu",
    "Vatican City (Holy See)", "Venezuela", "Vietnam", "Yemen", "Zambia", "Zimbabwe"
]


# Filter out non-country entities
filtered_entities = [country for country in countries if country in recognized_countries]

# Print the filtered list
print(filtered_entities)
['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Lithuania', 'Luxembourg', 'Madagascar', 'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Mauritania', 'Mauritius', 'Mexico', 'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'North Macedonia', 'Norway', 'Oman', 'Pakistan', 'Palau', 'Palestine', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar', 'Romania', 'Russia', 'Rwanda', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', 'Syria', 'Taiwan', 'Tajikistan', 'Tanzania', 'Thailand', 'Togo', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Tuvalu', 'Uganda', 'Ukraine', 'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela', 'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe']
In [43]:
len(filtered_entities)
Out[43]:
185
In [44]:
recognized_countries_set = set(filtered_entities)

# Filter the dataframe to include only rows where the 'entity' column is in the recognized countries set
merged_df = merged_df[merged_df['Entity'].isin(recognized_countries_set)]

# Display the filtered dataframe
merged_df
Out[44]:
Entity Code Year low_physical_activity non_exclusive_breastfeeding air_pollution child_wasting high_systolic_blood_pressure high_fasting_plasma_glucose child_stunting ... smoking vitamin_a_deficiency ambient_particulate_matter_pollution substance_use_disorders skin_and_subcutaneous_diseases musculoskeletal_disorders neoplasms neurological_disorders sense_organ_diseases mental_disorders
0 Afghanistan AFG 1990 61720.0600 197049.340 1986290.40 1708694.40 663575.50 310177.80 670056.500 ... 146352.77 184149.1000 143037.550 190.0 420.0 940.0 3270.0 1280.0 650.0 1700.0
1 Afghanistan AFG 1991 62191.6000 222485.780 2069430.00 1779057.60 670934.56 320839.94 687930.700 ... 148548.45 188899.7700 148112.800 190.0 420.0 920.0 3130.0 1270.0 630.0 1730.0
2 Afghanistan AFG 1992 63325.2340 271585.200 2298508.20 2005481.50 685869.90 335451.56 748159.940 ... 152365.47 194698.0500 163113.840 200.0 440.0 900.0 2980.0 1270.0 600.0 1790.0
3 Afghanistan AFG 1993 64873.6250 331279.280 2555748.20 2366581.00 705695.94 351943.25 862717.750 ... 157201.58 214913.9000 180597.550 210.0 430.0 870.0 2840.0 1240.0 570.0 1780.0
4 Afghanistan AFG 1994 66452.0700 340745.120 2707120.00 2559192.20 725500.00 367981.28 963305.500 ... 162079.20 246066.0500 190782.720 200.0 420.0 820.0 2700.0 1200.0 540.0 1710.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6715 Zimbabwe ZWE 2015 7196.8833 65671.530 653410.94 494563.78 288229.12 261661.61 63780.020 ... 254297.17 19442.2290 132720.830 580.0 850.0 1640.0 4430.0 1750.0 980.0 2190.0
6716 Zimbabwe ZWE 2016 7424.6733 62675.110 641920.20 489271.53 293492.00 267324.88 58934.336 ... 257688.56 15954.1960 128623.750 600.0 880.0 1720.0 4650.0 1820.0 1020.0 2280.0
6717 Zimbabwe ZWE 2017 7655.2163 60760.780 625386.40 472972.25 297975.16 271637.40 55018.605 ... 260584.25 14143.2295 122945.070 630.0 910.0 1800.0 4840.0 1880.0 1050.0 2360.0
6718 Zimbabwe ZWE 2018 7862.5596 57715.004 609545.30 453153.88 303253.00 276985.75 53059.625 ... 263968.06 14170.8080 120112.450 660.0 940.0 1900.0 5080.0 1970.0 1100.0 2470.0
6719 Zimbabwe ZWE 2019 8126.4062 56559.246 600330.00 442257.94 309965.70 283899.03 51181.336 ... 268873.30 13749.4900 119444.164 690.0 960.0 1970.0 5230.0 2010.0 1130.0 2530.0

5550 rows × 32 columns

In [45]:
merged_df
Out[45]:
Entity Code Year low_physical_activity non_exclusive_breastfeeding air_pollution child_wasting high_systolic_blood_pressure high_fasting_plasma_glucose child_stunting ... smoking vitamin_a_deficiency ambient_particulate_matter_pollution substance_use_disorders skin_and_subcutaneous_diseases musculoskeletal_disorders neoplasms neurological_disorders sense_organ_diseases mental_disorders
0 Afghanistan AFG 1990 61720.0600 197049.340 1986290.40 1708694.40 663575.50 310177.80 670056.500 ... 146352.77 184149.1000 143037.550 190.0 420.0 940.0 3270.0 1280.0 650.0 1700.0
1 Afghanistan AFG 1991 62191.6000 222485.780 2069430.00 1779057.60 670934.56 320839.94 687930.700 ... 148548.45 188899.7700 148112.800 190.0 420.0 920.0 3130.0 1270.0 630.0 1730.0
2 Afghanistan AFG 1992 63325.2340 271585.200 2298508.20 2005481.50 685869.90 335451.56 748159.940 ... 152365.47 194698.0500 163113.840 200.0 440.0 900.0 2980.0 1270.0 600.0 1790.0
3 Afghanistan AFG 1993 64873.6250 331279.280 2555748.20 2366581.00 705695.94 351943.25 862717.750 ... 157201.58 214913.9000 180597.550 210.0 430.0 870.0 2840.0 1240.0 570.0 1780.0
4 Afghanistan AFG 1994 66452.0700 340745.120 2707120.00 2559192.20 725500.00 367981.28 963305.500 ... 162079.20 246066.0500 190782.720 200.0 420.0 820.0 2700.0 1200.0 540.0 1710.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6715 Zimbabwe ZWE 2015 7196.8833 65671.530 653410.94 494563.78 288229.12 261661.61 63780.020 ... 254297.17 19442.2290 132720.830 580.0 850.0 1640.0 4430.0 1750.0 980.0 2190.0
6716 Zimbabwe ZWE 2016 7424.6733 62675.110 641920.20 489271.53 293492.00 267324.88 58934.336 ... 257688.56 15954.1960 128623.750 600.0 880.0 1720.0 4650.0 1820.0 1020.0 2280.0
6717 Zimbabwe ZWE 2017 7655.2163 60760.780 625386.40 472972.25 297975.16 271637.40 55018.605 ... 260584.25 14143.2295 122945.070 630.0 910.0 1800.0 4840.0 1880.0 1050.0 2360.0
6718 Zimbabwe ZWE 2018 7862.5596 57715.004 609545.30 453153.88 303253.00 276985.75 53059.625 ... 263968.06 14170.8080 120112.450 660.0 940.0 1900.0 5080.0 1970.0 1100.0 2470.0
6719 Zimbabwe ZWE 2019 8126.4062 56559.246 600330.00 442257.94 309965.70 283899.03 51181.336 ... 268873.30 13749.4900 119444.164 690.0 960.0 1970.0 5230.0 2010.0 1130.0 2530.0

5550 rows × 32 columns

In [46]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
In [49]:
data_to_scale = merged_df.drop(columns=['Year', 'Entity','Code'])

# Scale the data
scaler = StandardScaler()
merged_df_scaled = scaler.fit_transform(data_to_scale)

# Perform PCA

pca = PCA()
pca_out = pca.fit_transform(merged_df_scaled)
In [50]:
pd.DataFrame({'Center': scaler.mean_
                   , 'Scale': scaler.scale_}
                  , index=data_to_scale.columns)
Out[50]:
Center Scale
low_physical_activity 6.183756e+04 1.931739e+05
non_exclusive_breastfeeding 1.405859e+05 5.474627e+05
air_pollution 1.287456e+06 5.875304e+06
child_wasting 9.337180e+05 4.022350e+06
high_systolic_blood_pressure 1.013699e+06 3.894411e+06
high_fasting_plasma_glucose 6.231589e+05 2.323884e+06
child_stunting 2.162215e+05 1.033723e+06
high_body_mass_index 5.675718e+05 1.767211e+06
secondhand_smoke 2.069046e+05 9.895219e+05
unsafe_sanitation 4.167442e+05 2.100466e+06
unsafe_water_source 5.797473e+05 2.858931e+06
diet_low_in_vegetables 6.166332e+04 2.466170e+05
diet_low_in_fruits 1.318412e+05 6.228871e+05
diet_high_in_sodium 2.026395e+05 1.340764e+06
drug_use 1.346634e+05 5.809032e+05
household_air_pollution 7.858722e+05 3.737993e+06
high_ldl_cholesterol 4.403610e+05 1.593582e+06
iron_deficiency 1.611358e+05 9.209309e+05
zinc_deficiency 4.364368e+03 2.376293e+04
smoking 9.660387e+05 4.215962e+06
vitamin_a_deficiency 5.607599e+04 2.574250e+05
ambient_particulate_matter_pollution 4.874069e+05 2.429460e+06
substance_use_disorders 1.230150e+03 9.888776e+02
skin_and_subcutaneous_diseases 1.592032e+03 7.186604e+02
musculoskeletal_disorders 4.872905e+03 3.159057e+03
neoplasms 9.285721e+03 6.243723e+03
neurological_disorders 3.554023e+03 1.766960e+03
sense_organ_diseases 1.808128e+03 7.968354e+02
mental_disorders 4.833573e+03 2.402449e+03
In [51]:
print("Number of Principal Components:", pca.n_components_)
Number of Principal Components: 29
In [52]:
pca.explained_variance_
Out[52]:
array([1.54999845e+01, 6.53276398e+00, 3.61909967e+00, 7.32560579e-01,
       6.25741276e-01, 3.93517665e-01, 3.34616031e-01, 2.91070803e-01,
       2.35397005e-01, 1.73817632e-01, 1.35456760e-01, 8.71869269e-02,
       8.19874773e-02, 6.25877466e-02, 5.93653146e-02, 4.58567443e-02,
       3.21872375e-02, 1.63381244e-02, 1.46354767e-02, 8.10614164e-03,
       6.74050510e-03, 4.71839577e-03, 3.51122148e-03, 2.72828411e-03,
       2.00595908e-03, 1.77739354e-03, 1.18237924e-03, 2.81399207e-04,
       3.56051580e-06])
In [53]:
components_df = pd.DataFrame(pca.components_.T, index=data_to_scale.columns, columns=[f'PC{i+1}' for i in range(pca.n_components_)])

# Display the principal components DataFrame
components_df
Out[53]:
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 ... PC20 PC21 PC22 PC23 PC24 PC25 PC26 PC27 PC28 PC29
low_physical_activity 0.212415 0.145885 -0.118299 0.148716 0.176017 0.174498 -0.259750 -0.065327 -0.020123 0.119085 ... -0.046319 -0.171483 0.024133 -0.176680 -0.054725 -0.033785 0.098384 0.040547 0.032751 0.001869
non_exclusive_breastfeeding 0.198540 -0.157623 0.172540 -0.070970 -0.207033 0.131166 -0.009283 0.053285 0.258481 0.198225 ... -0.094732 0.090772 0.210018 0.178297 0.126202 -0.013032 -0.091051 -0.016574 -0.057973 0.001013
air_pollution 0.249433 -0.004436 0.010712 -0.141177 -0.026108 -0.127332 0.169233 0.013836 -0.062056 0.041759 ... -0.157459 -0.168074 0.126273 0.066974 -0.207898 -0.173945 -0.074675 -0.028926 -0.001878 -0.794811
child_wasting 0.203092 -0.165662 0.209707 -0.032775 -0.132708 0.065979 -0.026060 0.028135 0.068973 0.031481 ... 0.005129 0.060770 -0.038105 0.154870 -0.270562 0.562551 0.530557 0.051779 0.027733 -0.006829
high_systolic_blood_pressure 0.224138 0.133343 -0.155266 -0.008425 -0.019690 -0.005114 -0.102263 0.013279 -0.153901 -0.086560 ... 0.187753 -0.216277 0.147582 -0.015253 0.113661 -0.019206 0.178150 -0.768055 -0.081600 -0.021155
high_fasting_plasma_glucose 0.227395 0.129165 -0.125603 0.042586 0.135173 0.013292 -0.094272 -0.032811 -0.021061 -0.039968 ... -0.297484 0.246951 0.036509 0.090767 0.373547 -0.250640 0.413664 0.108037 0.030920 0.007607
child_stunting 0.193075 -0.173807 0.230871 -0.024464 -0.172057 0.095256 -0.088729 0.035597 0.030853 -0.008793 ... -0.193822 -0.320524 -0.420581 -0.377370 0.467569 0.056407 -0.180505 -0.016996 -0.060198 0.001799
high_body_mass_index 0.197867 0.165229 -0.168983 0.247438 0.102253 0.236677 -0.291714 -0.025623 0.071945 -0.025535 ... 0.128533 0.253420 0.228727 -0.135125 -0.187492 0.161851 -0.445071 0.020895 -0.054178 0.000004
secondhand_smoke 0.231702 0.081320 -0.097590 -0.213587 -0.153461 -0.098731 0.252120 0.036216 0.067969 0.104993 ... 0.379617 0.548524 -0.220425 -0.109400 0.148446 -0.141263 0.062158 -0.099244 0.114544 0.001678
unsafe_sanitation 0.200048 -0.162379 0.230774 0.045155 0.011008 0.028360 -0.085475 0.004197 -0.061002 -0.032213 ... 0.219704 0.200956 -0.119401 -0.120253 -0.164605 -0.339413 0.020996 0.112537 -0.611853 -0.018569
unsafe_water_source 0.202230 -0.159259 0.225930 0.053575 0.044742 0.015358 -0.081226 0.001301 -0.052449 -0.025387 ... 0.131142 0.051927 -0.076663 -0.061478 -0.114225 -0.198416 -0.134605 -0.038397 0.756960 0.021609
diet_low_in_vegetables 0.223073 -0.003001 0.061173 0.199295 0.422392 -0.189245 0.256951 -0.056530 0.259988 0.044845 ... 0.133436 -0.211880 -0.292306 0.127354 -0.146075 -0.047563 -0.046581 0.109079 -0.028864 -0.005125
diet_low_in_fruits 0.237983 0.091141 -0.098777 -0.049547 0.048964 -0.155906 0.173768 -0.007815 -0.048272 -0.025902 ... -0.426286 0.222439 0.056097 -0.585453 -0.192638 0.246134 0.017965 0.013297 0.010793 0.019107
diet_high_in_sodium 0.179092 0.152992 -0.227137 -0.321276 -0.339342 -0.039052 0.018138 0.073114 -0.173166 -0.059617 ... 0.383996 -0.288583 0.151559 -0.104752 0.078121 0.129316 -0.046616 0.456657 0.017031 0.001244
drug_use 0.200096 0.138842 -0.136886 0.133721 -0.158013 0.306864 0.092627 -0.056794 0.593269 0.081350 ... 0.120283 -0.192674 -0.048956 -0.028293 -0.031681 -0.006945 0.132248 -0.025997 0.016589 -0.000058
household_air_pollution 0.236732 -0.074069 0.099677 -0.143585 -0.089884 -0.119524 0.258922 0.022198 0.097320 0.065549 ... -0.230638 -0.173739 0.442174 0.045945 -0.127933 -0.290716 -0.091767 -0.015916 -0.028708 0.506683
high_ldl_cholesterol 0.221231 0.128952 -0.130133 0.169879 0.083725 0.036517 -0.152345 -0.017435 -0.188052 -0.095517 ... -0.052312 -0.074930 0.034550 0.180503 0.130587 -0.176917 0.183178 0.364620 0.080789 0.002359
iron_deficiency 0.222299 -0.059561 0.128716 0.096306 0.389170 -0.237284 0.204726 -0.058113 -0.055659 0.024745 ... 0.200753 0.019989 0.255890 0.108753 0.445703 0.399599 -0.163575 0.016476 -0.068514 -0.002163
zinc_deficiency 0.179360 -0.170202 0.241040 0.061467 -0.017952 0.047043 -0.173944 -0.009361 -0.202308 -0.141467 ... -0.001664 0.020548 0.038282 -0.012709 -0.048315 0.050459 0.032140 0.012431 -0.018547 -0.001104
smoking 0.212048 0.153815 -0.172452 -0.102328 -0.189664 0.051753 -0.036699 -0.024185 -0.010133 -0.031165 ... -0.332536 0.161181 -0.297513 0.504077 0.036267 0.161721 -0.377227 -0.057790 -0.054894 0.003514
vitamin_a_deficiency 0.178420 -0.188677 0.228016 0.018896 -0.129539 0.104102 -0.191474 -0.007663 -0.137638 -0.154974 ... 0.062788 0.066408 0.120335 0.147713 -0.023945 -0.007193 -0.061749 -0.043667 0.023674 0.001027
ambient_particulate_matter_pollution 0.230421 0.097758 -0.120554 -0.113656 0.075978 -0.119461 0.002449 0.001261 -0.297623 -0.003130 ... 0.027707 -0.164551 -0.368588 0.120523 -0.305179 0.024896 -0.015329 -0.088753 -0.051579 0.331291
substance_use_disorders -0.009187 0.253340 0.152362 0.655318 -0.371437 0.046692 0.428561 0.211380 -0.213386 -0.183106 ... -0.005056 0.006671 -0.002515 0.009304 0.007876 -0.004027 0.002399 -0.005918 0.000710 -0.000061
skin_and_subcutaneous_diseases -0.026409 0.286467 0.254842 -0.308609 0.135932 0.182479 0.156102 -0.223061 0.131008 -0.713949 ... -0.015133 0.017389 0.010965 -0.000632 -0.002133 -0.002841 0.001832 -0.003471 0.000801 0.000058
musculoskeletal_disorders -0.020727 0.316480 0.269477 -0.066615 0.016980 0.120133 -0.088698 -0.259265 0.090455 0.068288 ... 0.034514 -0.014673 0.013300 -0.027494 -0.013693 -0.009945 -0.010569 0.006906 0.000857 -0.000589
neoplasms -0.022955 0.290043 0.219899 0.113174 -0.278239 -0.534592 -0.247566 -0.448990 0.062620 0.185613 ... -0.004445 0.016030 0.008622 -0.004350 0.004853 -0.002982 0.018111 -0.004956 0.000813 0.000295
neurological_disorders -0.034357 0.315475 0.279734 -0.045605 0.000134 -0.007555 -0.045102 -0.043465 -0.083081 0.200422 ... 0.033316 -0.041694 -0.005462 0.011084 -0.019953 0.013155 -0.014659 0.009137 -0.001405 -0.000197
sense_organ_diseases 0.003371 0.302272 0.197044 -0.087097 0.091879 -0.287581 -0.299995 0.767711 0.251729 -0.093977 ... -0.015350 0.003857 -0.001763 0.011869 -0.010223 0.002640 -0.000232 -0.000998 0.000685 -0.000351
mental_disorders -0.033414 0.283241 0.262180 -0.190954 0.185897 0.423681 0.196368 0.127084 -0.298983 0.466906 ... -0.029407 0.017335 0.000300 0.013593 0.021954 0.009086 0.009116 -0.005454 -0.001966 0.000614

29 rows × 29 columns

In [54]:
from adjustText import adjust_text

np.random.seed(0)
colors = np.random.rand(len(merged_df['Entity'].unique()))

# Create the plot
fig, ax1 = plt.subplots(figsize=(14, 10))

ax1.set_xlim(-3.5, 3.5)
ax1.set_ylim(-3.5, 3.5)

texts = []
# Plot country names for PC1 and PC2
for i, label in enumerate(merged_df['Entity'].unique().tolist()):
    x = pca_out[i, 0]
    y = pca_out[i, 1]
    ax1.scatter(x, y, color=plt.cm.viridis(colors[i]), s=50, alpha=0.8, edgecolor='w', linewidth=0.5)
    texts.append(ax1.text(x, y, label, fontsize=8))

adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray', lw=0.5))

ax1.set_xlabel("PC1", fontsize=14)
ax1.set_ylabel("PC2", fontsize=14)
ax1.set_title("PCA of Countries Based on Health Metrics", fontsize=16)
ax1.grid(True)

plt.savefig('PCA_of_Countries_Health_Metrics.png', dpi=300, bbox_inches='tight')

plt.show()
In [55]:
merged_df_scaled =  pd.DataFrame(merged_df_scaled,columns= data_to_scale.columns)
merged_df_scaled
Out[55]:
low_physical_activity non_exclusive_breastfeeding air_pollution child_wasting high_systolic_blood_pressure high_fasting_plasma_glucose child_stunting high_body_mass_index secondhand_smoke unsafe_sanitation ... smoking vitamin_a_deficiency ambient_particulate_matter_pollution substance_use_disorders skin_and_subcutaneous_diseases musculoskeletal_disorders neoplasms neurological_disorders sense_organ_diseases mental_disorders
0 -0.000608 0.103137 0.118944 0.192668 -0.089904 -0.134680 0.439030 -0.150726 0.047507 -0.079876 ... -0.194424 0.497516 -0.141747 -1.051849 -1.630857 -1.244962 -0.963483 -1.286970 -1.453409 -1.304325
1 0.001833 0.149599 0.133095 0.210161 -0.088015 -0.130092 0.456321 -0.150468 0.056777 -0.060155 ... -0.193904 0.515971 -0.139658 -1.051849 -1.630857 -1.251293 -0.985906 -1.292629 -1.478508 -1.291837
2 0.007701 0.239284 0.172085 0.266452 -0.084179 -0.123805 0.514585 -0.148278 0.085124 -0.026027 ... -0.192998 0.538495 -0.133484 -1.041736 -1.603028 -1.257624 -1.009930 -1.292629 -1.516157 -1.266863
3 0.015717 0.348322 0.215868 0.356225 -0.079089 -0.116708 0.625406 -0.145777 0.120734 0.030372 ... -0.191851 0.617026 -0.126287 -1.031624 -1.616942 -1.267120 -1.032352 -1.309607 -1.553806 -1.271025
4 0.023888 0.365613 0.241632 0.404111 -0.074003 -0.109807 0.722712 -0.144219 0.143715 0.032545 ... -0.190694 0.738040 -0.122095 -1.041736 -1.630857 -1.282948 -1.054775 -1.332245 -1.591455 -1.300162
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5545 -0.282857 -0.136839 -0.107917 -0.109179 -0.186285 -0.155557 -0.147468 -0.212618 -0.133415 -0.120947 ... -0.168821 -0.142308 -0.145994 -0.657462 -1.032522 -1.023377 -0.777696 -1.020976 -1.039271 -1.100366
5546 -0.281678 -0.142312 -0.109873 -0.110494 -0.184934 -0.153120 -0.152156 -0.208528 -0.133217 -0.122793 ... -0.168016 -0.155858 -0.147680 -0.637237 -0.990777 -0.998053 -0.742461 -0.981360 -0.989072 -1.062904
5547 -0.280485 -0.145809 -0.112687 -0.114546 -0.183782 -0.151265 -0.155944 -0.204645 -0.133910 -0.124099 ... -0.167329 -0.162893 -0.150018 -0.606900 -0.949033 -0.972728 -0.712030 -0.947403 -0.951423 -1.029605
5548 -0.279411 -0.151373 -0.115383 -0.119473 -0.182427 -0.148963 -0.157839 -0.200608 -0.134524 -0.127986 ... -0.166527 -0.162786 -0.151184 -0.576562 -0.907289 -0.941073 -0.673592 -0.896468 -0.888675 -0.983818
5549 -0.278046 -0.153484 -0.116952 -0.122182 -0.180703 -0.145988 -0.159656 -0.195987 -0.134189 -0.130251 ... -0.165363 -0.164423 -0.151459 -0.546225 -0.879459 -0.918915 -0.649568 -0.873831 -0.851026 -0.958844

5550 rows × 29 columns

In [56]:
sns.set(style='whitegrid')
fig, ax = plt.subplots(1, 2, figsize=(16, 7), dpi=120)

sns.histplot(merged_df_scaled[merged_df_scaled.columns[0]], kde=True, color='skyblue', ax=ax[0], binwidth=0.5)
ax[0].set_title('Distribution of First Principal Component', fontsize=14)
ax[0].set_xlabel('Component Value', fontsize=12)
ax[0].set_ylabel('Frequency', fontsize=12)
ax[0].set_ylim(0, 100)  # Adjusted y-axis range

sns.histplot(merged_df_scaled[merged_df_scaled.columns[1]], kde=True, color='salmon', ax=ax[1], binwidth=0.5)
ax[1].set_title('Distribution of Second Principal Component', fontsize=14)
ax[1].set_xlabel('Component Value', fontsize=12)
ax[1].set_ylabel('Frequency', fontsize=12)
ax[1].set_ylim(0, 100)  # Adjusted y-axis range

# Adding a vertical line for the mean
mean_val_0 = merged_df_scaled[merged_df_scaled.columns[0]].mean()
mean_val_1 = merged_df_scaled[merged_df_scaled.columns[1]].mean()
ax[0].axvline(mean_val_0, color='red', linestyle='dashed', linewidth=1)
ax[1].axvline(mean_val_1, color='red', linestyle='dashed', linewidth=1)

# Add text annotation for mean
ax[0].text(mean_val_0 + 0.5, 95, f'Mean: {mean_val_0:.2f}', color = 'red', fontsize=10)
ax[1].text(mean_val_1 + 0.5, 95, f'Mean: {mean_val_1:.2f}', color = 'red', fontsize=10)

plt.tight_layout()
plt.show()
In [57]:
# Plotting the explained variance by PCA
plt.figure(figsize=(8, 6))
plt.bar(range(len(pca.explained_variance_ratio_)), pca.explained_variance_ratio_)
plt.xlabel('Principal Components')
plt.ylabel('Variance Explained')
plt.title('Explained Variance by PCA Components')
plt.show()
In [58]:
pca.explained_variance_ratio_
Out[58]:
array([5.34385920e-01, 2.25227135e-01, 1.24774054e-01, 2.52561582e-02,
       2.15733976e-02, 1.35671297e-02, 1.15364048e-02, 1.00351158e-02,
       8.11567557e-03, 5.99263152e-03, 4.67008115e-03, 3.00590405e-03,
       2.82664499e-03, 2.15780929e-03, 2.04671097e-03, 1.58098213e-03,
       1.10970476e-03, 5.63282090e-04, 5.04580679e-04, 2.79471761e-04,
       2.32389331e-04, 1.62673986e-04, 1.21054787e-04, 9.40618112e-05,
       6.91585394e-05, 6.12783892e-05, 4.07643518e-05, 9.70167258e-06,
       1.22754285e-07])
In [59]:
# Plot of proportion of variance explained
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

# Plot of proportion of variance explained
ax[0].plot(range(1, 30), pca.explained_variance_ratio_, marker='o')
ax[0].set_xlabel('Principal Component')
ax[0].set_ylabel('Proportion of Variance Explained')
ax[0].set_ylim(0, 1.03)
ax[0].set_xticks(range(1, 30))

# Plot of cumulative proportion of variance explained
ax[1].plot(range(1, 30), np.cumsum(pca.explained_variance_ratio_), marker='o')
ax[1].set_xlabel('Principal Component')
ax[1].set_ylabel('Cumulative Proportion of Variance Explained')
ax[1].set_ylim(0, 1.03)
ax[1].set_xticks(range(1, 30))

plt.tight_layout()
plt.show()
In [60]:
numeric_columns = merged_df_scaled.select_dtypes(include=['float64', 'int64']).columns
merged_numeric = merged_df_scaled[numeric_columns]
 
# Standardize the numeric data
scaler = StandardScaler()
merged_numeric_scaled = scaler.fit_transform(merged_numeric)
 
# Perform PCA to reduce to 4 components
pca = PCA(n_components=4)
pca_out = pca.fit_transform(merged_numeric_scaled)
 
# Print PCA components' loadings
loadings = pd.DataFrame(pca.components_.T, columns=[f'PC{i+1}' for i in range(4)], index=numeric_columns)
print("PCA Loadings:")
print(loadings)
PCA Loadings:
                                           PC1       PC2       PC3       PC4
low_physical_activity                 0.212415  0.145885 -0.118299  0.148719
non_exclusive_breastfeeding           0.198540 -0.157623  0.172540 -0.070970
air_pollution                         0.249433 -0.004436  0.010712 -0.141176
child_wasting                         0.203092 -0.165662  0.209707 -0.032775
high_systolic_blood_pressure          0.224138  0.133343 -0.155266 -0.008427
high_fasting_plasma_glucose           0.227395  0.129165 -0.125603  0.042588
child_stunting                        0.193075 -0.173807  0.230871 -0.024464
high_body_mass_index                  0.197867  0.165229 -0.168983  0.247438
secondhand_smoke                      0.231702  0.081320 -0.097590 -0.213588
unsafe_sanitation                     0.200048 -0.162379  0.230774  0.045156
unsafe_water_source                   0.202230 -0.159259  0.225930  0.053575
diet_low_in_vegetables                0.223073 -0.003001  0.061173  0.199293
diet_low_in_fruits                    0.237983  0.091141 -0.098777 -0.049549
diet_high_in_sodium                   0.179092  0.152992 -0.227137 -0.321276
drug_use                              0.200096  0.138842 -0.136886  0.133722
household_air_pollution               0.236732 -0.074069  0.099677 -0.143585
high_ldl_cholesterol                  0.221231  0.128952 -0.130133  0.169875
iron_deficiency                       0.222299 -0.059561  0.128716  0.096307
zinc_deficiency                       0.179360 -0.170202  0.241040  0.061468
smoking                               0.212048  0.153815 -0.172452 -0.102329
vitamin_a_deficiency                  0.178420 -0.188677  0.228016  0.018895
ambient_particulate_matter_pollution  0.230421  0.097758 -0.120554 -0.113654
substance_use_disorders              -0.009187  0.253340  0.152362  0.655319
skin_and_subcutaneous_diseases       -0.026409  0.286467  0.254842 -0.308608
musculoskeletal_disorders            -0.020727  0.316480  0.269477 -0.066616
neoplasms                            -0.022955  0.290043  0.219899  0.113176
neurological_disorders               -0.034357  0.315475  0.279734 -0.045609
sense_organ_diseases                  0.003371  0.302272  0.197044 -0.087096
mental_disorders                     -0.033414  0.283241  0.262180 -0.190953
In [64]:
merged_df.columns
Out[64]:
Index(['Entity', 'Code_x', 'Year', 'low_physical_activity',
       'non_exclusive_breastfeeding', 'air_pollution', 'child_wasting',
       'high_systolic_blood_pressure', 'high_fasting_plasma_glucose',
       'child_stunting', 'high_body_mass_index', 'secondhand_smoke',
       'unsafe_sanitation', 'unsafe_water_source', 'diet_low_in_vegetables',
       'diet_low_in_fruits', 'diet_high_in_sodium', 'drug_use',
       'household_air_pollution', 'high_ldl_cholesterol', 'iron_deficiency',
       'zinc_deficiency', 'smoking', 'vitamin_a_deficiency',
       'ambient_particulate_matter_pollution', 'Code_y',
       'substance_use_disorders', 'skin_and_subcutaneous_diseases',
       'musculoskeletal_disorders', 'neoplasms', 'neurological_disorders',
       'sense_organ_diseases', 'mental_disorders'],
      dtype='object')
In [65]:
features = [
    'low_physical_activity', 'non_exclusive_breastfeeding', 'air_pollution', 
    'child_wasting', 'high_systolic_blood_pressure', 'high_fasting_plasma_glucose', 
    'high_body_mass_index', 'secondhand_smoke', 'unsafe_sanitation', 'unsafe_water_source', 
    'diet_low_in_vegetables', 'diet_low_in_fruits', 'diet_high_in_sodium', 'drug_use', 
    'household_air_pollution', 'high_ldl_cholesterol', 'iron_deficiency', 'zinc_deficiency', 
    'smoking', 'vitamin_a_deficiency', 'ambient_particulate_matter_pollution', 
    'substance_use_disorders', 'skin_and_subcutaneous_diseases', 'musculoskeletal_disorders', 
    'neoplasms', 'neurological_disorders', 'sense_organ_diseases', 'child_stunting']

target = 'mental_disorders'

# Normalize the dataset
scaler = MinMaxScaler()
merged_df[features] = scaler.fit_transform(merged_df[features])
merged_df[target] = scaler.fit_transform(merged_df[[target]])
In [66]:
merged_df
Out[66]:
Entity Code_x Year low_physical_activity non_exclusive_breastfeeding air_pollution child_wasting high_systolic_blood_pressure high_fasting_plasma_glucose child_stunting ... vitamin_a_deficiency ambient_particulate_matter_pollution Code_y substance_use_disorders skin_and_subcutaneous_diseases musculoskeletal_disorders neoplasms neurological_disorders sense_organ_diseases mental_disorders
0 Afghanistan AFG 1990 0.024609 0.024400 0.028845 0.022883 0.012181 0.009364 0.035928 ... 0.038347 0.004351 AFG 0.022659 0.087404 0.058946 0.092971 0.135356 0.139759 0.109306
1 Afghanistan AFG 1991 0.024797 0.027550 0.030052 0.023825 0.012316 0.009686 0.036887 ... 0.039336 0.004506 AFG 0.022659 0.087404 0.057559 0.088498 0.134189 0.134940 0.111521
2 Afghanistan AFG 1992 0.025249 0.033629 0.033379 0.026857 0.012591 0.010128 0.040116 ... 0.040544 0.004962 AFG 0.024169 0.092545 0.056172 0.083706 0.134189 0.127711 0.115953
3 Afghanistan AFG 1993 0.025867 0.041021 0.037115 0.031693 0.012955 0.010626 0.046259 ... 0.044754 0.005494 AFG 0.025680 0.089974 0.054092 0.079233 0.130688 0.120482 0.115214
4 Afghanistan AFG 1994 0.026496 0.042193 0.039313 0.034272 0.013319 0.011111 0.051652 ... 0.051241 0.005804 AFG 0.024169 0.087404 0.050624 0.074760 0.126021 0.113253 0.110044
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
5545 Zimbabwe ZWE 2015 0.002861 0.008132 0.009488 0.006623 0.005287 0.007898 0.003420 ... 0.004049 0.004037 ZWE 0.081571 0.197943 0.107490 0.130032 0.190198 0.219277 0.145495
5546 Zimbabwe ZWE 2016 0.002952 0.007761 0.009321 0.006552 0.005383 0.008070 0.003160 ... 0.003322 0.003913 ZWE 0.084592 0.205656 0.113037 0.137061 0.198366 0.228916 0.152142
5547 Zimbabwe ZWE 2017 0.003044 0.007524 0.009081 0.006334 0.005466 0.008200 0.002950 ... 0.002945 0.003740 ZWE 0.089124 0.213368 0.118585 0.143131 0.205368 0.236145 0.158050
5548 Zimbabwe ZWE 2018 0.003127 0.007147 0.008851 0.006068 0.005563 0.008361 0.002845 ... 0.002951 0.003654 ZWE 0.093656 0.221080 0.125520 0.150799 0.215869 0.248193 0.166174
5549 Zimbabwe ZWE 2019 0.003232 0.007003 0.008717 0.005923 0.005686 0.008570 0.002744 ... 0.002863 0.003633 ZWE 0.098187 0.226221 0.130374 0.155591 0.220537 0.255422 0.170606

5550 rows × 33 columns

In [67]:
pca = PCA(n_components=4)  # Select number of components
merged_df_pca = pca.fit_transform(merged_dfdf.drop(columns=['Entity','Year','Code_x','Code_y']))
merged_df_pca
Out[67]:
array([[-0.64612718, -0.0248205 , -0.04948753,  0.03328347],
       [-0.65036896, -0.02247287, -0.04492744,  0.03350559],
       [-0.65257246, -0.01691882, -0.03583051,  0.03512559],
       ...,
       [-0.45180058, -0.06040674, -0.05563566,  0.00464654],
       [-0.42997334, -0.05967361, -0.0546987 ,  0.00425658],
       [-0.41675952, -0.05889445, -0.0550508 ,  0.00457749]])
In [68]:
column_names = [f'PC{i}' for i in range(1, merged_df_pca.shape[1] + 1)]

# Create a new DataFrame
pca_df = pd.DataFrame(data=merged_df_pca, columns=column_names)
In [69]:
pca_df
Out[69]:
PC1 PC2 PC3 PC4
0 -0.646127 -0.024821 -0.049488 0.033283
1 -0.650369 -0.022473 -0.044927 0.033506
2 -0.652572 -0.016919 -0.035831 0.035126
3 -0.661566 -0.008751 -0.028581 0.042535
4 -0.673319 -0.004180 -0.025247 0.046511
... ... ... ... ...
5545 -0.486504 -0.061376 -0.057477 0.005150
5546 -0.468311 -0.060805 -0.056375 0.004515
5547 -0.451801 -0.060407 -0.055636 0.004647
5548 -0.429973 -0.059674 -0.054699 0.004257
5549 -0.416760 -0.058894 -0.055051 0.004577

5550 rows × 4 columns

In [70]:
X = pca_df.values  # Features (PC components)
y = merged_df['mental_disorders'].values.ravel()  # Target variable
In [71]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

SVR¶

In [14]:
param_grid_rbf = {'C': [0.01, 0.1, 1, 10,50,100], 'gamma': [0.01, 0.1, 1, 10,50,100]}

# Initialize GridSearchCV with RBF kernel
grid_search_rbf = GridSearchCV(SVR(kernel='rbf', verbose=True), param_grid_rbf, scoring='neg_mean_squared_error', cv=10, n_jobs=-1)

# Fit the GridSearchCV on the training data
grid_search_rbf.fit(X_train, y_train)

# Get the best parameters and the best estimator
best_params_rbf = grid_search_rbf.best_params_
best_estimator_rbf = grid_search_rbf.best_estimator_

print("Best parameters for RBF kernel:", best_params_rbf)

# Evaluate the best RBF model on the test set
accuracy_rbf = best_estimator_rbf.score(X_test, y_test)
print(f"Accuracy with the best RBF model: {accuracy_rbf * 100:.2f}%")
.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1103
obj = -1.463646, rho = -0.324476
nSV = 1438, nBSV = 1438
....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1095

Warning: using -h 0 may be faster
*
optimization finished, #iter = 1111
obj = -1.450658, rho = -0.325477
nSV = 1438, nBSV = 1438

Warning: using -h 0 may be faster
*obj = -1.462666, rho = -0.325532
nSV = 1450, nBSV = 1450

optimization finished, #iter = 1110
obj = -1.468013, rho = -0.325201
nSV = 1446, nBSV = 1446

Warning: using -h 0 may be faster
*
optimization finished, #iter = 1111
obj = -1.481792, rho = -0.325563
nSV = 1454, nBSV = 1454
.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1120
.obj = -1.487925, rho = -0.325715
nSV = 1452, nBSV = 1452
.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1111
obj = -1.472229, rho = -0.326717
nSV = 1454, nBSV = 1454
.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1108
obj = -1.462636, rho = -0.324253
nSV = 1444, nBSV = 1444

Warning: using -h 0 may be faster
*
optimization finished, #iter = 1108
obj = -1.474819, rho = -0.328993
nSV = 1466, nBSV = 1466
.*
optimization finished, #iter = 510
obj = -0.510583, rho = -0.337970
nSV = 627, nBSV = 624

Warning: using -h 0 may be faster
*
optimization finished, #iter = 1111
obj = -1.468520, rho = -0.325974
nSV = 1456, nBSV = 1456
*
optimization finished, #iter = 497
obj = -0.498536, rho = -0.337768
nSV = 606, nBSV = 606
*
optimization finished, #iter = 510
obj = -0.508828, rho = -0.336354
nSV = 621, nBSV = 619
*
optimization finished, #iter = 510
obj = -0.501838, rho = -0.336550
nSV = 617, nBSV = 614
*
optimization finished, #iter = 493
obj = -0.498506, rho = -0.339346
nSV = 614, nBSV = 614
*
optimization finished, #iter = 500
obj = -0.488937, rho = -0.341242
nSV = 604, nBSV = 604
*
optimization finished, #iter = 513
obj = -0.496468, rho = -0.335430
nSV = 604, nBSV = 601
*
optimization finished, #iter = 501
obj = -0.499688, rho = -0.338540
nSV = 612, nBSV = 612
*
optimization finished, #iter = 253
obj = -0.274006, rho = -0.356846
nSV = 435, nBSV = 429
*
optimization finished, #iter = 499
obj = -0.494124, rho = -0.336779
nSV = 608, nBSV = 608
*
optimization finished, #iter = 247
obj = -0.264562, rho = -0.352545
nSV = 425, nBSV = 419
*
optimization finished, #iter = 251
obj = -0.265863, rho = -0.348124
nSV = 421, nBSV = 417
*
optimization finished, #iter = 502
obj = -0.497957, rho = -0.335812
nSV = 613, nBSV = 611
*
optimization finished, #iter = 251
obj = -0.266630, rho = -0.351611
nSV = 428, nBSV = 422
*
optimization finished, #iter = 252
obj = -0.261688, rho = -0.354702
nSV = 427, nBSV = 419
*
optimization finished, #iter = 262
obj = -0.266561, rho = -0.351533
nSV = 433, nBSV = 428
*
optimization finished, #iter = 262
obj = -0.271855, rho = -0.352938
nSV = 443, nBSV = 436
*
optimization finished, #iter = 249
obj = -0.262970, rho = -0.353176
nSV = 428, nBSV = 424
*
optimization finished, #iter = 249
obj = -0.267444, rho = -0.351334
nSV = 433, nBSV = 426
*
optimization finished, #iter = 244
obj = -0.266746, rho = -0.354738
nSV = 427, nBSV = 421
*
optimization finished, #iter = 251
obj = -0.269414, rho = -0.359860
nSV = 414, nBSV = 394
*
optimization finished, #iter = 256
obj = -0.275997, rho = -0.357632
nSV = 421, nBSV = 397
*
optimization finished, #iter = 261
obj = -0.273073, rho = -0.357131
nSV = 416, nBSV = 394
*
optimization finished, #iter = 273
obj = -0.277697, rho = -0.362180
nSV = 430, nBSV = 406
*
optimization finished, #iter = 259
obj = -0.277492, rho = -0.359869
nSV = 428, nBSV = 404
*
optimization finished, #iter = 261
obj = -0.273198, rho = -0.362624
nSV = 425, nBSV = 401
*
optimization finished, #iter = 263
obj = -0.267990, rho = -0.361797
nSV = 422, nBSV = 395
*
optimization finished, #iter = 254
obj = -0.272911, rho = -0.358676
nSV = 420, nBSV = 398
*
optimization finished, #iter = 259
obj = -0.270906, rho = -0.360268
nSV = 415, nBSV = 396
*
optimization finished, #iter = 261
obj = -0.271329, rho = -0.358869
nSV = 424, nBSV = 401
*
optimization finished, #iter = 332
obj = -0.426746, rho = -0.373948
nSV = 546, nBSV = 486
*
optimization finished, #iter = 314
obj = -0.416846, rho = -0.372208
nSV = 532, nBSV = 479
*
optimization finished, #iter = 318
obj = -0.426335, rho = -0.372414
nSV = 529, nBSV = 474
*
optimization finished, #iter = 306
obj = -0.419990, rho = -0.373260
nSV = 527, nBSV = 473
*
optimization finished, #iter = 313
obj = -0.423256, rho = -0.371785
nSV = 535, nBSV = 480
*
optimization finished, #iter = 317
obj = -0.428845, rho = -0.373725
nSV = 539, nBSV = 487
*
optimization finished, #iter = 311
obj = -0.419347, rho = -0.376486
nSV = 527, nBSV = 482
*
optimization finished, #iter = 312
obj = -0.423435, rho = -0.373441
nSV = 530, nBSV = 479
*
optimization finished, #iter = 319
obj = -0.420396, rho = -0.372488
nSV = 533, nBSV = 481
*
optimization finished, #iter = 312
obj = -0.415093, rho = -0.372888
nSV = 526, nBSV = 472
*
optimization finished, #iter = 373
obj = -0.543907, rho = -0.379507
nSV = 647, nBSV = 580
*
optimization finished, #iter = 387
obj = -0.552731, rho = -0.378834
nSV = 646, nBSV = 573
*
optimization finished, #iter = 388
obj = -0.557790, rho = -0.381246
nSV = 658, nBSV = 596
*
optimization finished, #iter = 386
obj = -0.542286, rho = -0.379888
nSV = 638, nBSV = 570
*
optimization finished, #iter = 404
obj = -0.558065, rho = -0.380964
nSV = 659, nBSV = 587
*
optimization finished, #iter = 389
obj = -0.550958, rho = -0.380226
nSV = 662, nBSV = 585
*
optimization finished, #iter = 377
obj = -0.551317, rho = -0.381702
nSV = 646, nBSV = 581
*
optimization finished, #iter = 392
obj = -0.545092, rho = -0.380999
nSV = 645, nBSV = 570
*
optimization finished, #iter = 387
obj = -0.546694, rho = -0.379935
nSV = 647, nBSV = 577
*
optimization finished, #iter = 267
obj = -2.317302, rho = -0.442589
nSV = 425, nBSV = 419
*
optimization finished, #iter = 258
obj = -2.236913, rho = -0.436204
nSV = 406, nBSV = 401
*
optimization finished, #iter = 510
obj = -4.852408, rho = -0.343128
nSV = 601, nBSV = 599
*
optimization finished, #iter = 522
obj = -4.835956, rho = -0.338642
nSV = 588, nBSV = 588
*
optimization finished, #iter = 522
obj = -4.758874, rho = -0.336792
nSV = 590, nBSV = 588
*
optimization finished, #iter = 525
obj = -4.974817, rho = -0.342755
nSV = 614, nBSV = 611
*
optimization finished, #iter = 262
obj = -2.236312, rho = -0.441505
nSV = 413, nBSV = 409
*
optimization finished, #iter = 516
obj = -4.810248, rho = -0.339991
nSV = 594, nBSV = 594
*
optimization finished, #iter = 388
obj = -0.544754, rho = -0.383938
nSV = 652, nBSV = 575
*
optimization finished, #iter = 269
obj = -2.199047, rho = -0.443960
nSV = 408, nBSV = 403
*
optimization finished, #iter = 265
obj = -2.259902, rho = -0.434272
nSV = 418, nBSV = 415
*
optimization finished, #iter = 253
obj = -2.242809, rho = -0.434212
nSV = 412, nBSV = 408
*
optimization finished, #iter = 269
obj = -2.232987, rho = -0.439116
nSV = 418, nBSV = 410
*
optimization finished, #iter = 261
obj = -2.308076, rho = -0.440123
nSV = 428, nBSV = 419
*
optimization finished, #iter = 503
obj = -4.852050, rho = -0.343142
nSV = 596, nBSV = 594
*
optimization finished, #iter = 519
obj = -4.959608, rho = -0.339967
nSV = 609, nBSV = 607
*
optimization finished, #iter = 276
obj = -1.474786, rho = -0.396544
nSV = 355, nBSV = 340
*
optimization finished, #iter = 260
obj = -2.197627, rho = -0.434588
nSV = 410, nBSV = 402
*
optimization finished, #iter = 290
obj = -1.513555, rho = -0.400913
nSV = 371, nBSV = 353
*
optimization finished, #iter = 518
obj = -4.849325, rho = -0.339949
nSV = 602, nBSV = 602
*
optimization finished, #iter = 511
obj = -4.865294, rho = -0.342652
nSV = 600, nBSV = 600
*
optimization finished, #iter = 268
obj = -1.437815, rho = -0.398990
nSV = 351, nBSV = 334
*
optimization finished, #iter = 524
obj = -4.891607, rho = -0.338557
nSV = 605, nBSV = 603
*
optimization finished, #iter = 279
obj = -1.458444, rho = -0.398487
nSV = 351, nBSV = 334
*
optimization finished, #iter = 351
obj = -1.077935, rho = -0.360476
nSV = 290, nBSV = 245
*
optimization finished, #iter = 269
obj = -1.460408, rho = -0.397505
nSV = 358, nBSV = 342
*
optimization finished, #iter = 280
obj = -1.427558, rho = -0.399358
nSV = 345, nBSV = 326
*
optimization finished, #iter = 264
obj = -2.240030, rho = -0.443600
nSV = 420, nBSV = 413
*
optimization finished, #iter = 244
obj = -1.464350, rho = -0.396342
nSV = 349, nBSV = 334
*
optimization finished, #iter = 330
obj = -1.063557, rho = -0.359485
nSV = 290, nBSV = 244
*
optimization finished, #iter = 264
obj = -1.510425, rho = -0.397814
nSV = 361, nBSV = 346
*
optimization finished, #iter = 411
obj = -1.081737, rho = -0.361101
nSV = 284, nBSV = 240
*
optimization finished, #iter = 354
obj = -1.089235, rho = -0.360696
nSV = 292, nBSV = 255
*
optimization finished, #iter = 340
obj = -1.071645, rho = -0.360787
nSV = 281, nBSV = 241
*
optimization finished, #iter = 278
obj = -1.443044, rho = -0.401066
nSV = 352, nBSV = 335
*
optimization finished, #iter = 340
obj = -1.093079, rho = -0.359516
nSV = 292, nBSV = 250
*
optimization finished, #iter = 276
obj = -1.459918, rho = -0.396746
nSV = 354, nBSV = 336
*
optimization finished, #iter = 497
obj = -0.983314, rho = -0.367503
nSV = 296, nBSV = 174
*
optimization finished, #iter = 482
obj = -0.965048, rho = -0.366625
nSV = 293, nBSV = 171
*
optimization finished, #iter = 341
obj = -1.113406, rho = -0.359515
nSV = 293, nBSV = 249
*
optimization finished, #iter = 343
obj = -1.067224, rho = -0.359857
nSV = 285, nBSV = 242
*
optimization finished, #iter = 342
obj = -1.063808, rho = -0.359925
nSV = 285, nBSV = 239
*
optimization finished, #iter = 381
obj = -1.067656, rho = -0.358636
nSV = 290, nBSV = 246
*
optimization finished, #iter = 495
obj = -0.995497, rho = -0.367775
nSV = 298, nBSV = 171
*
optimization finished, #iter = 649
obj = -1.140802, rho = -0.371631
nSV = 339, nBSV = 127
*
optimization finished, #iter = 316
obj = -23.293925, rho = -0.648578
nSV = 427, nBSV = 421
*
optimization finished, #iter = 541
obj = -0.986870, rho = -0.367572
nSV = 305, nBSV = 175
*
optimization finished, #iter = 716
obj = -1.138343, rho = -0.372666
nSV = 345, nBSV = 132
*
optimization finished, #iter = 475
obj = -1.008818, rho = -0.367500
nSV = 300, nBSV = 179
*
optimization finished, #iter = 295
obj = -22.093758, rho = -0.655216
nSV = 414, nBSV = 408
*
optimization finished, #iter = 312
obj = -22.109594, rho = -0.652791
nSV = 410, nBSV = 404
*
optimization finished, #iter = 377
obj = -15.263912, rho = -0.659622
nSV = 370, nBSV = 358
*
optimization finished, #iter = 478
obj = -0.984249, rho = -0.365942
nSV = 297, nBSV = 174
*
optimization finished, #iter = 362
obj = -15.060874, rho = -0.671414
nSV = 376, nBSV = 367
*
optimization finished, #iter = 489
obj = -0.993953, rho = -0.367436
nSV = 299, nBSV = 173
*
optimization finished, #iter = 531
obj = -0.984332, rho = -0.367767
*nSV = 297, nBSV = 167

optimization finished, #iter = 304
obj = -22.745007, rho = -0.647209
nSV = 422, nBSV = 416
*
optimization finished, #iter = 718
obj = -1.131764, rho = -0.371926
nSV = 337, nBSV = 126
*
optimization finished, #iter = 637
obj = -1.139698, rho = -0.370886
nSV = 327, nBSV = 127
*
optimization finished, #iter = 313
obj = -22.494278, rho = -0.645674
nSV = 420, nBSV = 412
*
optimization finished, #iter = 312
obj = -23.211450, rho = -0.659395
nSV = 427, nBSV = 421
*
optimization finished, #iter = 372
obj = -15.156937, rho = -0.657676
nSV = 376, nBSV = 363
*
optimization finished, #iter = 508
obj = -0.981855, rho = -0.366995
nSV = 296, nBSV = 171
*
optimization finished, #iter = 314
obj = -22.482080, rho = -0.648772
nSV = 410, nBSV = 404
*
optimization finished, #iter = 535
obj = -0.991780, rho = -0.365616
nSV = 301, nBSV = 170
*
optimization finished, #iter = 671
obj = -1.124870, rho = -0.372196
nSV = 333, nBSV = 127
*
optimization finished, #iter = 385
obj = -15.075611, rho = -0.664930
nSV = 374, nBSV = 363
*
optimization finished, #iter = 666
obj = -1.112458, rho = -0.371123
nSV = 333, nBSV = 129
*
optimization finished, #iter = 294
obj = -22.463901, rho = -0.654999
nSV = 414, nBSV = 408
*
optimization finished, #iter = 721
obj = -1.148453, rho = -0.372086
nSV = 342, nBSV = 126
**
optimization finished, #iter = 349

optimization finished, #iter = 403
obj = -15.748325, rho = -0.669452
obj = -14.972779, rho = -0.688794
nSV = 388, nBSV = 378
nSV = 384, nBSV = 369
*
optimization finished, #iter = 289
obj = -22.494887, rho = -0.609187
nSV = 416, nBSV = 409
**
optimization finished, #iter = 359
obj = -14.797170, rho = -0.672167
nSV = 375, nBSV = 361

optimization finished, #iter = 637
obj = -1.124583, rho = -0.372161
nSV = 340, nBSV = 135
*
optimization finished, #iter = 708
obj = -10.893092, rho = -0.411110
nSV = 310, nBSV = 289
*
optimization finished, #iter = 655
obj = -1.149857, rho = -0.372222
nSV = 339, nBSV = 133
*
optimization finished, #iter = 313
obj = -22.432615, rho = -0.637931
nSV = 420, nBSV = 412
*
optimization finished, #iter = 393
obj = -15.339299, rho = -0.642415
nSV = 375, nBSV = 362
*
optimization finished, #iter = 540
obj = -10.527128, rho = -0.414038
nSV = 297, nBSV = 273
**
optimization finished, #iter = 377
obj = -15.774116, rho = -0.665706
nSV = 383, nBSV = 371

optimization finished, #iter = 685
obj = -1.122288, rho = -0.371004
nSV = 336, nBSV = 123
*
optimization finished, #iter = 592
obj = -10.423611, rho = -0.414606
nSV = 285, nBSV = 268
*
optimization finished, #iter = 390
obj = -15.045847, rho = -0.688430
nSV = 384, nBSV = 372
*
optimization finished, #iter = 699
obj = -10.788471, rho = -0.420705
nSV = 305, nBSV = 283
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1723
obj = -6.125348, rho = -0.368290
nSV = 260, nBSV = 189
*.*
optimization finished, #iter = 667
obj = -11.091288, rho = -0.413109
nSV = 307, nBSV = 285
.*
optimization finished, #iter = 2090
obj = -2.725640, rho = -0.369352
nSV = 255, nBSV = 85
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1345
obj = -5.957202, rho = -0.370751
nSV = 247, nBSV = 186
*.*.*
optimization finished, #iter = 1812
obj = -2.837166, rho = -0.370277
nSV = 249, nBSV = 85
*
optimization finished, #iter = 605
obj = -10.450417, rho = -0.409345
nSV = 293, nBSV = 273
*.*
optimization finished, #iter = 1651
obj = -2.011133, rho = -0.373328
nSV = 289, nBSV = 32
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1788
obj = -2.958236, rho = -0.371088
nSV = 259, nBSV = 94
*
optimization finished, #iter = 666
obj = -10.791890, rho = -0.410281
nSV = 294, nBSV = 270
.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1997
obj = -1.973988, rho = -0.375253
nSV = 294, nBSV = 39
*
optimization finished, #iter = 573
obj = -10.722619, rho = -0.411062
nSV = 300, nBSV = 273
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1363
obj = -6.071466, rho = -0.366508
nSV = 252, nBSV = 190
*.*.*
optimization finished, #iter = 1572
obj = -5.956591, rho = -0.369005
nSV = 256, nBSV = 189
*..*
optimization finished, #iter = 2048
obj = -2.748981, rho = -0.373672
nSV = 270, nBSV = 92
*.*
optimization finished, #iter = 1334
obj = -5.997140, rho = -0.369017
nSV = 255, nBSV = 191
*.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1885
obj = -2.825156, rho = -0.370093
nSV = 260, nBSV = 90
*
optimization finished, #iter = 1575
obj = -2.012971, rho = -0.373887
nSV = 297, nBSV = 35
*
optimization finished, #iter = 639
obj = -10.605243, rho = -0.412103
nSV = 295, nBSV = 277
*
optimization finished, #iter = 651
obj = -10.599453, rho = -0.412135
nSV = 300, nBSV = 276
*.*
optimization finished, #iter = 1732
obj = -2.824267, rho = -0.370911
nSV = 252, nBSV = 85
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1274
obj = -6.046943, rho = -0.373431
nSV = 253, nBSV = 192
*.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1563
obj = -5.954101, rho = -0.366737
nSV = 250, nBSV = 188
*
optimization finished, #iter = 1670
obj = -2.066341, rho = -0.373912
nSV = 292, nBSV = 43
*.*.*
optimization finished, #iter = 1920
obj = -2.920294, rho = -0.371329
nSV = 259, nBSV = 94
*.*.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1654
obj = -6.189725, rho = -0.373729
nSV = 263, nBSV = 197
*
optimization finished, #iter = 533
obj = -172.721975, rho = -1.649370
nSV = 422, nBSV = 414
*
optimization finished, #iter = 1794
obj = -2.077562, rho = -0.375049
nSV = 295, nBSV = 42

Warning: using -h 0 may be faster
*
optimization finished, #iter = 1826
obj = -2.811957, rho = -0.369939
nSV = 256, nBSV = 87
*
optimization finished, #iter = 612
obj = -164.260230, rho = -1.703016
nSV = 412, nBSV = 403
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1429
obj = -5.957973, rho = -0.372260
nSV = 251, nBSV = 185
*.*
optimization finished, #iter = 1767
obj = -1.941010, rho = -0.374525
nSV = 288, nBSV = 34
*.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1592
obj = -6.285300, rho = -0.373209
nSV = 257, nBSV = 192
*
optimization finished, #iter = 1935
obj = -2.085495, rho = -0.374843
nSV = 295, nBSV = 38
*
optimization finished, #iter = 551
obj = -168.275668, rho = -1.615648
nSV = 416, nBSV = 407
*
optimization finished, #iter = 551
obj = -162.353098, rho = -1.614624
nSV = 408, nBSV = 399
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1866
obj = -2.878577, rho = -0.368451
nSV = 257, nBSV = 91
*.*
optimization finished, #iter = 1600
obj = -2.731404, rho = -0.369501
nSV = 239, nBSV = 81
*
optimization finished, #iter = 532
obj = -172.975840, rho = -1.648652
nSV = 422, nBSV = 414
*.*
optimization finished, #iter = 1905
obj = -1.975816, rho = -0.374237
nSV = 295, nBSV = 42
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1038
obj = -124.709920, rho = -1.068489
nSV = 329, nBSV = 315
*.*
optimization finished, #iter = 1735
obj = -1.990729, rho = -0.376542
nSV = 304, nBSV = 39
*.*
optimization finished, #iter = 586
obj = -165.604000, rho = -1.685946
nSV = 423, nBSV = 413

Warning: using -h 0 may be faster
*
optimization finished, #iter = 1757
obj = -2.025945, rho = -0.374225
nSV = 295, nBSV = 41
*
optimization finished, #iter = 542
obj = -166.713643, rho = -1.557494
nSV = 401, nBSV = 394
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1174
obj = -123.846364, rho = -1.042538
nSV = 338, nBSV = 325
*
optimization finished, #iter = 450
obj = -164.728115, rho = -1.667654
nSV = 409, nBSV = 400
....
Warning: using -h 0 may be faster
*.*
optimization finished, #iter = 4364
obj = -86.869363, rho = -0.441592
nSV = 282, nBSV = 252
.*.
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*.
optimization finished, #iter = 1223
obj = -123.615367, rho = -1.068294
nSV = 334, nBSV = 320

Warning: using -h 0 may be faster
*
optimization finished, #iter = 4916
obj = -85.823311, rho = -0.440036
nSV = 275, nBSV = 246
..
Warning: using -h 0 may be faster
*....*
optimization finished, #iter = 5203
obj = -85.351369, rho = -0.454288
nSV = 277, nBSV = 243

Warning: using -h 0 may be faster
*
optimization finished, #iter = 1508
obj = -128.585127, rho = -1.106218
nSV = 352, nBSV = 335
*
optimization finished, #iter = 522
obj = -165.533517, rho = -1.470837
nSV = 407, nBSV = 400
.*
optimization finished, #iter = 546
obj = -165.191682, rho = -1.593941
nSV = 407, nBSV = 398
.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1426
obj = -123.406957, rho = -1.087858
nSV = 338, nBSV = 324
....
Warning: using -h 0 may be faster
*........*.*
optimization finished, #iter = 11658
obj = -39.209082, rho = -0.355525
nSV = 250, nBSV = 163
..
Warning: using -h 0 may be faster
*.....*
optimization finished, #iter = 10158
obj = -38.811405, rho = -0.352493
nSV = 243, nBSV = 159
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1153
obj = -125.105323, rho = -1.029168
nSV = 336, nBSV = 320
.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1418
obj = -121.675492, rho = -1.094662
nSV = 330, nBSV = 313
....
Warning: using -h 0 may be faster
.*.
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 5259
obj = -90.548363, rho = -0.442002
nSV = 283, nBSV = 258

Warning: using -h 0 may be faster
*
optimization finished, #iter = 4909
obj = -87.713034, rho = -0.445217
nSV = 282, nBSV = 247
.....*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1269
obj = -120.165026, rho = -1.067118
nSV = 324, nBSV = 307
.
Warning: using -h 0 may be faster

Warning: using -h 0 may be faster
*.*.....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 5051
obj = -84.157442, rho = -0.445494
nSV = 279, nBSV = 242
..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 7765
obj = -9.321187, rho = -0.375904
nSV = 232, nBSV = 36
*......*.*
optimization finished, #iter = 9394
obj = -8.337857, rho = -0.377869
nSV = 244, nBSV = 33
..*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1355
obj = -129.034607, rho = -1.062316
nSV = 346, nBSV = 325
.........
Warning: using -h 0 may be faster
*..*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1259
obj = -121.816100, rho = -1.106802
nSV = 333, nBSV = 319
.
Warning: using -h 0 may be faster
.*......*.....*
optimization finished, #iter = 9629
obj = -38.972436, rho = -0.353851
nSV = 249, nBSV = 162
*
optimization finished, #iter = 11481
obj = -37.409831, rho = -0.353266
nSV = 255, nBSV = 159
....
Warning: using -h 0 may be faster
*....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 5592
obj = -87.155684, rho = -0.453602
nSV = 274, nBSV = 246
.
Warning: using -h 0 may be faster
.*..
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4436
obj = -88.068177, rho = -0.430494
nSV = 282, nBSV = 247
......*
optimization finished, #iter = 11020
obj = -38.197506, rho = -0.352704
nSV = 253, nBSV = 163
...
Warning: using -h 0 may be faster
.*..*.*
optimization finished, #iter = 9355
obj = -9.500683, rho = -0.376591
nSV = 246, nBSV = 37
.......
Warning: using -h 0 may be faster
*.*.*.....*
optimization finished, #iter = 8402
obj = -8.622574, rho = -0.376536
nSV = 242, nBSV = 34
*
optimization finished, #iter = 4660
obj = -3.944361, rho = -0.373740
nSV = 292, nBSV = 7
.
Warning: using -h 0 may be faster
.*....*
optimization finished, #iter = 9419
obj = -8.643499, rho = -0.377636
nSV = 247, nBSV = 34
*
optimization finished, #iter = 3304
obj = -2.839366, rho = -0.374953
nSV = 272, nBSV = 4
.......
Warning: using -h 0 may be faster
.*....
Warning: using -h 0 may be faster
**.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4145
obj = -85.400034, rho = -0.429371
nSV = 274, nBSV = 241
....*
optimization finished, #iter = 9783
obj = -37.636531, rho = -0.356508
nSV = 253, nBSV = 158
...
Warning: using -h 0 may be faster
*...........*
optimization finished, #iter = 9758
obj = -39.395585, rho = -0.364586
nSV = 251, nBSV = 165

Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster

Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*..*......*
optimization finished, #iter = 9799
obj = -39.578521, rho = -0.363551
nSV = 254, nBSV = 166
....*
optimization finished, #iter = 4639
obj = -3.874577, rho = -0.374188
nSV = 286, nBSV = 5
*
optimization finished, #iter = 3940
obj = -3.947793, rho = -0.373371
nSV = 284, nBSV = 6
.*.*
optimization finished, #iter = 8872
obj = -8.866273, rho = -0.377112
nSV = 242, nBSV = 33
...
Warning: using -h 0 may be faster
*.......
Warning: using -h 0 may be faster

Warning: using -h 0 may be faster
*.
optimization finished, #iter = 1417
obj = -781.816868, rho = -3.120530
nSV = 414, nBSV = 402
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 3813
obj = -86.170624, rho = -0.446993
nSV = 277, nBSV = 246
*
optimization finished, #iter = 9166
obj = -8.816787, rho = -0.372518
nSV = 248, nBSV = 32
..*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1583
obj = -742.809936, rho = -3.232852
nSV = 404, nBSV = 392
........
Warning: using -h 0 may be faster

Warning: using -h 0 may be faster
*.*.
Warning: using -h 0 may be faster
*..*.....*
optimization finished, #iter = 3633
obj = -4.017472, rho = -0.374594
nSV = 280, nBSV = 7
.*.
optimization finished, #iter = 3619
obj = -3.479355, rho = -0.374553
nSV = 284, nBSV = 4
.*.
optimization finished, #iter = 4031
obj = -4.022281, rho = -0.374554
nSV = 278, nBSV = 6
*.*
optimization finished, #iter = 9508
obj = -8.171188, rho = -0.374954
nSV = 249, nBSV = 30
.
Warning: using -h 0 may be faster
.*...*.
Warning: using -h 0 may be faster

Warning: using -h 0 may be faster
**
optimization finished, #iter = 1418
obj = -733.123433, rho = -3.059735
nSV = 399, nBSV = 388

optimization finished, #iter = 1121
.obj = -761.027911, rho = -2.944929
nSV = 400, nBSV = 386
*.*
optimization finished, #iter = 8468
obj = -40.794499, rho = -0.357721
nSV = 252, nBSV = 164
......
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1228
..obj = -782.964564, rho = -3.125813
nSV = 405, nBSV = 393
.*
optimization finished, #iter = 8485
obj = -37.855556, rho = -0.359777
nSV = 244, nBSV = 153
..*
optimization finished, #iter = 7799
obj = -8.834722, rho = -0.374902
nSV = 237, nBSV = 35
.
Warning: using -h 0 may be faster
.*..*
optimization finished, #iter = 3315
obj = -3.829564, rho = -0.376261
nSV = 293, nBSV = 6
.
Warning: using -h 0 may be faster
*....*
optimization finished, #iter = 4750
obj = -3.733763, rho = -0.374582
nSV = 279, nBSV = 6

Warning: using -h 0 may be faster
.*...*
optimization finished, #iter = 3486
obj = -3.979689, rho = -0.373805
nSV = 295, nBSV = 6
..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1232
obj = -756.635008, rho = -3.025359
nSV = 395, nBSV = 383
.
Warning: using -h 0 may be faster
**.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1286
obj = -746.657563, rho = -3.254865
nSV = 408, nBSV = 394
..
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4797
obj = -564.576921, rho = -1.530398
nSV = 319, nBSV = 299
..*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1472
obj = -745.589069, rho = -3.047800
nSV = 403, nBSV = 392
.*
optimization finished, #iter = 8465
obj = -9.159835, rho = -0.373104
nSV = 240, nBSV = 37
....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2926
obj = -565.211895, rho = -1.489009
nSV = 326, nBSV = 306
......
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1390
obj = -750.957974, rho = -2.833709
nSV = 401, nBSV = 389
..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1440
obj = -747.132705, rho = -3.066507
nSV = 400, nBSV = 386
....
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4836
obj = -579.778145, rho = -1.525225
nSV = 338, nBSV = 316
...
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4046
obj = -557.251350, rho = -1.494045
nSV = 320, nBSV = 302
...........
Warning: using -h 0 may be faster
...*.........
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 17921
obj = -381.228061, rho = -0.377898
nSV = 278, nBSV = 235
*........*..*
optimization finished, #iter = 15437
obj = -379.602299, rho = -0.409936
nSV = 268, nBSV = 228
...
Warning: using -h 0 may be faster
*..........*
optimization finished, #iter = 16001
obj = -380.134737, rho = -0.369709
nSV = 267, nBSV = 226
................
Warning: using -h 0 may be faster
.*.....
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 3967
obj = -549.478186, rho = -1.529538
nSV = 314, nBSV = 295
........
Warning: using -h 0 may be faster
*.....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4917
obj = -565.779132, rho = -1.504186
nSV = 326, nBSV = 305
.....*........*....................*....*
optimization finished, #iter = 17430
obj = -17.818197, rho = -0.375401
nSV = 245, nBSV = 13
..........*......
Warning: using -h 0 may be faster
.....*
optimization finished, #iter = 17633
obj = -20.013562, rho = -0.376532
nSV = 238, nBSV = 15
.....
Warning: using -h 0 may be faster
*.*..
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 3672
obj = -557.243302, rho = -1.479429
nSV = 319, nBSV = 300
.*
optimization finished, #iter = 51272
obj = -147.154800, rho = -0.324166
nSV = 242, nBSV = 133
.......*.......................
Warning: using -h 0 may be faster
*
optimization finished, #iter = 3894
obj = -583.265444, rho = -1.485609
nSV = 326, nBSV = 307
.........
Warning: using -h 0 may be faster
*.............*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 17624
obj = -387.508775, rho = -0.411724
nSV = 273, nBSV = 232
..................
Warning: using -h 0 may be faster
.*........*.....
Warning: using -h 0 may be faster
.*...*
optimization finished, #iter = 52273
.obj = -151.360961, rho = -0.332503
nSV = 245, nBSV = 138
.....
Warning: using -h 0 may be faster
.....*
optimization finished, #iter = 18236
obj = -370.671603, rho = -0.401884
nSV = 270, nBSV = 230
......*.*
optimization finished, #iter = 18735

Warning: using -h 0 may be faster
obj = -398.666117, rho = -0.399230
nSV = 274, nBSV = 239
*..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4454
obj = -541.985879, rho = -1.480543
nSV = 308, nBSV = 289
.............*..............
Warning: using -h 0 may be faster
*.*..*
optimization finished, #iter = 20021
obj = -20.273042, rho = -0.376711
nSV = 244, nBSV = 13
...............
Warning: using -h 0 may be faster
..*..*..*
optimization finished, #iter = 22004
obj = -18.529336, rho = -0.376776
nSV = 238, nBSV = 11
..*..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 17577
obj = -17.836657, rho = -0.379399
nSV = 231, nBSV = 10
.....*
optimization finished, #iter = 6761
obj = -8.211409, rho = -0.375379
nSV = 294, nBSV = 3
.........
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 3132
obj = -550.984679, rho = -1.527228
nSV = 317, nBSV = 297
.........................
Warning: using -h 0 may be faster
..*...............
Warning: using -h 0 may be faster
.*.*.....*
optimization finished, #iter = 15633
obj = -381.250157, rho = -0.410354
nSV = 267, nBSV = 228
..........
Warning: using -h 0 may be faster
*...................*
optimization finished, #iter = 18004
obj = -391.164242, rho = -0.386823
nSV = 280, nBSV = 240
.....................*
optimization finished, #iter = 6100
obj = -8.104388, rho = -0.375950
nSV = 285, nBSV = 3
....*..*..
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 50004
.obj = -140.620124, rho = -0.332156
nSV = 250, nBSV = 127
........*...*.........
Warning: using -h 0 may be faster
..*.......*......*
optimization finished, #iter = 4343
obj = -4.238892, rho = -0.375709
nSV = 272, nBSV = 1
....*........*...............*.
optimization finished, #iter = 19387
obj = -19.453774, rho = -0.375698
nSV = 240, nBSV = 11
....*..............*
optimization finished, #iter = 22040
obj = -19.553129, rho = -0.378863
nSV = 234, nBSV = 12
...........*..*..
Warning: using -h 0 may be faster
...*.*
optimization finished, #iter = 44499
obj = -148.653846, rho = -0.318207
nSV = 243, nBSV = 125
........*.
optimization finished, #iter = 6550
obj = -8.275712, rho = -0.376678
nSV = 291, nBSV = 4
........................
Warning: using -h 0 may be faster
..*.*......*
optimization finished, #iter = 54505
obj = -143.886009, rho = -0.329159
nSV = 250, nBSV = 132
..*.*
optimization finished, #iter = 5210
obj = -8.253266, rho = -0.375243
nSV = 289, nBSV = 4
.....*.......
Warning: using -h 0 may be faster
*.*
optimization finished, #iter = 18561
obj = -14.031212, rho = -0.375167
nSV = 235, nBSV = 7
....
Warning: using -h 0 may be faster
...*..*....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 17814
obj = -379.894165, rho = -0.360453
nSV = 270, nBSV = 234
............*
optimization finished, #iter = 6300
obj = -6.403978, rho = -0.374363
nSV = 286, nBSV = 2
*..........
Warning: using -h 0 may be faster
*..........*
optimization finished, #iter = 15226
obj = -378.120639, rho = -0.355292
nSV = 271, nBSV = 230
................
Warning: using -h 0 may be faster
..*............*..*
optimization finished, #iter = 19786
obj = -18.957277, rho = -0.373519
nSV = 230, nBSV = 12
*.......
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 6573
obj = -7.735303, rho = -0.377177
nSV = 286, nBSV = 4

Warning: using -h 0 may be faster
*
optimization finished, #iter = 2271
obj = -1496.878480, rho = -3.267900
nSV = 395, nBSV = 382
*..................*
optimization finished, #iter = 6793
obj = -8.225162, rho = -0.376974
nSV = 292, nBSV = 4
.......................*..
Warning: using -h 0 may be faster

Warning: using -h 0 may be faster
*
optimization finished, #iter = 49164
obj = -145.198444, rho = -0.328296
nSV = 237, nBSV = 127
.*..*.
optimization finished, #iter = 5062
obj = -8.012554, rho = -0.377944
nSV = 294, nBSV = 4
.........
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2199
.obj = -1419.671101, rho = -3.426234
nSV = 383, nBSV = 370
..*.*............
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1862
obj = -1456.486637, rho = -3.146674
nSV = 377, nBSV = 363
......................*
optimization finished, #iter = 46877
obj = -152.773153, rho = -0.346553
nSV = 248, nBSV = 137
..*.
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2125
obj = -1401.882575, rho = -3.328296
nSV = 381, nBSV = 367
*
optimization finished, #iter = 18176
obj = -19.667910, rho = -0.372951
nSV = 232, nBSV = 12
...........
Warning: using -h 0 may be faster
*..........
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2127
obj = -1500.364701, rho = -3.207050
nSV = 390, nBSV = 375
.........*....*........
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2145
obj = -1448.364398, rho = -3.210599
nSV = 373, nBSV = 359
..........*.
optimization finished, #iter = 11662
obj = -7.770484, rho = -0.375078
nSV = 290, nBSV = 2
....
Warning: using -h 0 may be faster
*...*
optimization finished, #iter = 8861
obj = -1093.315966, rho = -1.489447
nSV = 325, nBSV = 305
..*..
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 7436
obj = -1091.332876, rho = -1.558007
nSV = 313, nBSV = 290
....*
optimization finished, #iter = 43264
obj = -148.078302, rho = -0.342942
nSV = 245, nBSV = 132

Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2348
obj = -1426.580234, rho = -3.433941
nSV = 385, nBSV = 373
.....
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2065
obj = -1427.837573, rho = -3.363091
nSV = 382, nBSV = 368
......
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 2217
.obj = -1438.211060, rho = -3.174569
nSV = 380, nBSV = 364
............
Warning: using -h 0 may be faster
*.....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 8257
obj = -1077.019569, rho = -1.481852
nSV = 311, nBSV = 293
...............*..............
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 8140
.obj = -1116.937668, rho = -1.501907
nSV = 332, nBSV = 312
*
optimization finished, #iter = 7004
.obj = -1076.212622, rho = -1.468752
nSV = 317, nBSV = 296
..
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2385
obj = -1429.693474, rho = -3.313559
nSV = 380, nBSV = 364
.
Warning: using -h 0 may be faster
.*......................*.
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 33690
obj = -727.220429, rho = -0.309744
nSV = 273, nBSV = 236
..............*........*
optimization finished, #iter = 48194
obj = -145.895064, rho = -0.356052
nSV = 244, nBSV = 129
........................*....................................
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 7271
obj = -1048.833270, rho = -1.474960
nSV = 302, nBSV = 284
...........
Warning: using -h 0 may be faster
*......
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 7365
obj = -1062.041219, rho = -1.560079
nSV = 308, nBSV = 290
.......
Warning: using -h 0 may be faster
*.*...*
optimization finished, #iter = 47039
.obj = -154.752861, rho = -0.332239
nSV = 254, nBSV = 142
..
Warning: using -h 0 may be faster
*..............*.....*
optimization finished, #iter = 28058
obj = -726.522959, rho = -0.215324
nSV = 264, nBSV = 225
.
Warning: using -h 0 may be faster
*..*
optimization finished, #iter = 6233
obj = -1092.614605, rho = -1.463152
nSV = 316, nBSV = 295
.............*..*
optimization finished, #iter = 36135
obj = -725.272862, rho = -0.380602
nSV = 262, nBSV = 224
........................*.......................................................*.................*
optimization finished, #iter = 24622
obj = -25.535165, rho = -0.377271
nSV = 242, nBSV = 7
....*..
Warning: using -h 0 may be faster
.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 6216
obj = -1063.622422, rho = -1.537629
nSV = 316, nBSV = 293
................*.......*.....
Warning: using -h 0 may be faster
*......
Warning: using -h 0 may be faster
*.
optimization finished, #iter = 7169
.obj = -1125.857431, rho = -1.476289
nSV = 320, nBSV = 301
...............................
Warning: using -h 0 may be faster
*...............................
Warning: using -h 0 may be faster
....*...*
optimization finished, #iter = 36848
obj = -740.576317, rho = -0.330154
nSV = 265, nBSV = 230
......*................*..........*...........*
optimization finished, #iter = 21983
.obj = -28.146993, rho = -0.378876
nSV = 243, nBSV = 8
......*
optimization finished, #iter = 87737
obj = -269.911009, rho = -0.352734
nSV = 250, nBSV = 129
..........*.....*....*
optimization finished, #iter = 82787
obj = -262.226135, rho = -0.333049
nSV = 238, nBSV = 117
.....................*..*
optimization finished, #iter = 27006
obj = -29.103188, rho = -0.377792
nSV = 245, nBSV = 9
..........................*...*...........................................*.......
Warning: using -h 0 may be faster
..*.......................*.*.
optimization finished, #iter = 34080
obj = -708.306754, rho = -0.325082
nSV = 268, nBSV = 225
*.*...*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 25385
obj = -28.286967, rho = -0.376228
nSV = 240, nBSV = 9
......*
optimization finished, #iter = 22880
obj = -26.767087, rho = -0.379929
nSV = 243, nBSV = 8
......
Warning: using -h 0 may be faster
*.............................*
optimization finished, #iter = 38192
*obj = -760.301765, rho = -0.347091
nSV = 272, nBSV = 234
......
Warning: using -h 0 may be faster
..*
optimization finished, #iter = 24052
obj = -29.966073, rho = -0.379010
nSV = 236, nBSV = 8
.........................*..*............
Warning: using -h 0 may be faster
*................*...*
optimization finished, #iter = 28597
obj = -18.576622, rho = -0.375367
nSV = 229, nBSV = 5
....*.*
optimization finished, #iter = 38049
obj = -724.243019, rho = -0.307888
nSV = 260, nBSV = 217
................................*..
Warning: using -h 0 may be faster
...*
optimization finished, #iter = 26957
obj = -29.154822, rho = -0.375971
nSV = 231, nBSV = 8
.....................*.....
Warning: using -h 0 may be faster
..*.............................
Warning: using -h 0 may be faster
....**...*.
optimization finished, #iter = 7856
obj = -8.692063, rho = -0.375289
nSV = 290, nBSV = 2
..*..........*.....................*
optimization finished, #iter = 6789
obj = -11.322358, rho = -0.379091
nSV = 295, nBSV = 3
..................*...*................*
optimization finished, #iter = 80036
obj = -265.990225, rho = -0.325770
nSV = 237, nBSV = 117
.............*.....
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 25851
obj = -29.674430, rho = -0.380024
nSV = 226, nBSV = 8
.............*....*
optimization finished, #iter = 27559
obj = -30.259796, rho = -0.378443
.nSV = 246, nBSV = 9
.................................
Warning: using -h 0 may be faster
.*.*......
Warning: using -h 0 may be faster
..*.................*......*
optimization finished, #iter = 7025
obj = -11.592746, rho = -0.376716
nSV = 287, nBSV = 4
...*.
optimization finished, #iter = 93424
obj = -250.642086, rho = -0.341229
nSV = 229, nBSV = 119
.............................
Warning: using -h 0 may be faster
..
Warning: using -h 0 may be faster
*.*.
Warning: using -h 0 may be faster
.......*.............*
optimization finished, #iter = 4332
.obj = -5.361557, rho = -0.376385
nSV = 268, nBSV = 1
....*.*...*
optimization finished, #iter = 36080
obj = -748.777770, rho = -0.273011
nSV = 277, nBSV = 235
....*
optimization finished, #iter = 18456
obj = -11.715874, rho = -0.377330
nSV = 290, nBSV = 2
........*.*
optimization finished, #iter = 9816
obj = -10.860997, rho = -0.377491
nSV = 290, nBSV = 3
.........*......*..................
Warning: using -h 0 may be faster
*.....................*
optimization finished, #iter = 33750
obj = -723.344636, rho = -0.260715
nSV = 268, nBSV = 230
..........*.......*.................*.
optimization finished, #iter = 15420
obj = -11.715147, rho = -0.375906
nSV = 286, nBSV = 2
..........
Warning: using -h 0 may be faster
......*.....*
optimization finished, #iter = 14759
obj = -11.727731, rho = -0.375397
nSV = 282, nBSV = 2
........................*...
Warning: using -h 0 may be faster
*...*
optimization finished, #iter = 9832
obj = -11.675405, rho = -0.377594
nSV = 283, nBSV = 2
......*
optimization finished, #iter = 98437
obj = -254.159996, rho = -0.333277
nSV = 240, nBSV = 119
..................*...*
optimization finished, #iter = 16771
obj = -10.655115, rho = -0.375730
nSV = 292, nBSV = 2
.................
Warning: using -h 0 may be faster
*...................*
optimization finished, #iter = 31795
obj = -722.675491, rho = -0.254995
nSV = 266, nBSV = 223
.............................*........................................................*...*..................*
optimization finished, #iter = 79108
obj = -273.132858, rho = -0.356526
nSV = 247, nBSV = 124
................................*..................*
optimization finished, #iter = 89745
obj = -260.052190, rho = -0.339371
nSV = 241, nBSV = 120
....................*..........................................................*.....*
optimization finished, #iter = 87631
obj = -262.841829, rho = -0.344771
nSV = 245, nBSV = 131
...................................*..........................................................*.............*
optimization finished, #iter = 87592
obj = -262.461382, rho = -0.377731
nSV = 243, nBSV = 122
....*.................................................................*...*
optimization finished, #iter = 99830
obj = -274.677412, rho = -0.335479
nSV = 255, nBSV = 130
[LibSVM].......................*...................................*.............*
optimization finished, #iter = 70762
obj = -163.579109, rho = -0.339624
nSV = 264, nBSV = 151
Best parameters for RBF kernel: {'C': 50, 'gamma': 10}
Accuracy with the best RBF model: 88.75%
In [15]:
model = SVR(kernel = 'rbf',C=10,gamma=1)

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the model
test_mse = mean_squared_error(y_test, y_pred)
test_r2 = r2_score(y_test, y_pred)
print(f'Test Mean Squared Error: {test_mse}')
print(f'Test R-squared: {test_r2}')
Test Mean Squared Error: 0.0038995784534656534
Test R-squared: 0.8727087538040207
In [16]:
import matplotlib.pyplot as plt

# Plot actual vs predicted
plt.figure(figsize=(25, 12))  # Enlarge the plot
plt.plot(y_pred, color='blue', label='Predicted')
plt.plot(y_test, color='red', label='Actual')  # Plotting the line y=x for reference
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Actual vs Predicted Values')
plt.legend()

# Set xlim and ylim
plt.xlim(0, 500)  
plt.ylim(0, 1) 
plt.show()
In [17]:
residuals = y_test - y_pred.flatten()
# Plot histogram of residuals
plt.figure(figsize=(10, 6))
plt.hist(residuals, bins=30, color='purple', alpha=0.7)
plt.title('Histogram of Residuals')
plt.xlabel('Residual')
plt.ylabel('Frequency')
plt.show()

Linear Model¶

In [18]:
target = 'mental_disorders'

# Model Building: SVR with RBF kernel
lm_model = LinearRegression()
lm_model.fit(X_train, y_train)

# Model Evaluation
y_pred = lm_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

plt.figure(figsize=(25, 12))  
plt.plot(y_pred, color='blue', label='Predicted')
plt.plot(y_test, color='red', label='Actual')  # Plotting the line y=x for reference
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Actual vs Predicted Values')
plt.legend()

# Plotting residuals
residuals = y_test - y_pred.flatten()
# Plot histogram of residuals
plt.figure(figsize=(10, 6))
plt.hist(residuals, bins=30, color='purple', alpha=0.7)
plt.title('Histogram of Residuals')
plt.xlabel('Residual')
plt.ylabel('Frequency')
plt.show()
Mean Squared Error: 0.003954179689872241
R-squared: 0.8709264433545774

Lstm¶

In [19]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping,ReduceLROnPlateau
In [20]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout

# Assuming merged_numeric_scaled is the standardized feature data
# Extract the target variable before performing PCA
target_column = 'mental_disorders'
y = merged_df[target_column].values
# Drop the target column from the feature data
X = merged_df.drop(columns=[target_column,'Entity','Code_x','Code_y']).values
# Perform PCA to reduce to 4 components
pca = PCA(n_components=4)
pca_out = pca.fit_transform(X)
# Function to create sequences
def create_sequences(data, target, n_timesteps):
    sequences = []
    targets = []
    for i in range(len(data) - n_timesteps + 1):
        seq = data[i:i + n_timesteps]
        sequences.append(seq)
        targets.append(target[i + n_timesteps - 1])
    return np.array(sequences), np.array(targets)
 
# Assuming each sample should be a sequence of 10 timesteps
n_timesteps = 10
n_features = pca_out.shape[1]
# Create sequences
X_seq, y_seq = create_sequences(pca_out, y, n_timesteps)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.3, random_state=42)
# Print the number of features
print(n_features)
# Create the RNN model with Bidirectional LSTM layers for regression task
model2 = Sequential()
# Bidirectional LSTM layer with Dropout regularization
model2.add(Bidirectional(LSTM(units=64, return_sequences=True, input_shape=(n_timesteps, n_features))))
model2.add(Dropout(0.3))
# Second Bidirectional LSTM layer
model2.add(Bidirectional(LSTM(units=64, return_sequences=True)))
model2.add(Dropout(0.3))
# Third Bidirectional LSTM layer
model2.add(Bidirectional(LSTM(units=64)))
model2.add(Dropout(0.3))
# Output layer for regression task
model2.add(Dense(units=1))
# Compile the model with rmsprop optimizer
model2.compile(optimizer='adam', loss='mse', metrics=['mae'])
# Summary of the model
model2.summary()
# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
# Reduce learning rate on plateau callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)
# Train the model
history = model2.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping, reduce_lr])
# Plot training & validation metrics
plt.figure(figsize=(12, 6))
# Plot loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
# Plot mean absolute error
plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Training MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.title('Model Mean Absolute Error')
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error')
plt.legend()
plt.show()
# Make predictions
y_pred = model2.predict(X_test)
# Plot actual vs. predicted values using a line plot
plt.figure(figsize=(10, 6))
plt.plot(range(len(y_test)), y_test, label='Actual Values', linestyle='-', marker='o')
plt.plot(range(len(y_test)), y_pred, label='Predicted Values', linestyle='-', marker='x')
plt.title('Actual vs. Predicted Values')
plt.xlabel('Sample Index')
plt.ylabel('Mental Disorders')
plt.legend()
plt.show()
4
/Applications/Anaconda/anaconda3/lib/python3.11/site-packages/keras/src/layers/rnn/rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(**kwargs)
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ bidirectional (Bidirectional)   │ ?                      │   0 (unbuilt) │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ ?                      │   0 (unbuilt) │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ bidirectional_1 (Bidirectional) │ ?                      │   0 (unbuilt) │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ ?                      │   0 (unbuilt) │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ bidirectional_2 (Bidirectional) │ ?                      │   0 (unbuilt) │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_2 (Dropout)             │ ?                      │   0 (unbuilt) │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ ?                      │   0 (unbuilt) │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 0 (0.00 B)
 Trainable params: 0 (0.00 B)
 Non-trainable params: 0 (0.00 B)
Epoch 1/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 4s 16ms/step - loss: 0.0424 - mae: 0.1527 - val_loss: 0.0145 - val_mae: 0.0981 - learning_rate: 0.0010
Epoch 2/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0138 - mae: 0.0875 - val_loss: 0.0108 - val_mae: 0.0802 - learning_rate: 0.0010
Epoch 3/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0120 - mae: 0.0813 - val_loss: 0.0103 - val_mae: 0.0744 - learning_rate: 0.0010
Epoch 4/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 3s 21ms/step - loss: 0.0110 - mae: 0.0779 - val_loss: 0.0092 - val_mae: 0.0705 - learning_rate: 0.0010
Epoch 5/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 3s 22ms/step - loss: 0.0099 - mae: 0.0742 - val_loss: 0.0100 - val_mae: 0.0787 - learning_rate: 0.0010
Epoch 6/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 3s 22ms/step - loss: 0.0101 - mae: 0.0755 - val_loss: 0.0087 - val_mae: 0.0665 - learning_rate: 0.0010
Epoch 7/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 3s 22ms/step - loss: 0.0097 - mae: 0.0729 - val_loss: 0.0084 - val_mae: 0.0630 - learning_rate: 0.0010
Epoch 8/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 3s 22ms/step - loss: 0.0091 - mae: 0.0701 - val_loss: 0.0081 - val_mae: 0.0638 - learning_rate: 0.0010
Epoch 9/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 20ms/step - loss: 0.0089 - mae: 0.0708 - val_loss: 0.0092 - val_mae: 0.0724 - learning_rate: 0.0010
Epoch 10/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0092 - mae: 0.0712 - val_loss: 0.0081 - val_mae: 0.0621 - learning_rate: 0.0010
Epoch 11/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0087 - mae: 0.0688 - val_loss: 0.0082 - val_mae: 0.0637 - learning_rate: 0.0010
Epoch 12/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0084 - mae: 0.0681 - val_loss: 0.0082 - val_mae: 0.0669 - learning_rate: 0.0010
Epoch 13/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0086 - mae: 0.0693 - val_loss: 0.0080 - val_mae: 0.0629 - learning_rate: 0.0010
Epoch 14/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0088 - mae: 0.0685 - val_loss: 0.0078 - val_mae: 0.0640 - learning_rate: 0.0010
Epoch 15/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0085 - mae: 0.0670 - val_loss: 0.0080 - val_mae: 0.0661 - learning_rate: 0.0010
Epoch 16/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0084 - mae: 0.0675 - val_loss: 0.0078 - val_mae: 0.0633 - learning_rate: 0.0010
Epoch 17/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 20ms/step - loss: 0.0083 - mae: 0.0668 - val_loss: 0.0076 - val_mae: 0.0613 - learning_rate: 0.0010
Epoch 18/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 20ms/step - loss: 0.0079 - mae: 0.0650 - val_loss: 0.0079 - val_mae: 0.0655 - learning_rate: 0.0010
Epoch 19/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0083 - mae: 0.0663 - val_loss: 0.0076 - val_mae: 0.0617 - learning_rate: 0.0010
Epoch 20/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0077 - mae: 0.0641 - val_loss: 0.0086 - val_mae: 0.0643 - learning_rate: 0.0010
Epoch 21/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0081 - mae: 0.0652 - val_loss: 0.0079 - val_mae: 0.0640 - learning_rate: 0.0010
Epoch 22/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0077 - mae: 0.0636 - val_loss: 0.0078 - val_mae: 0.0638 - learning_rate: 0.0010
Epoch 23/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0081 - mae: 0.0655 - val_loss: 0.0075 - val_mae: 0.0607 - learning_rate: 0.0010
Epoch 24/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0076 - mae: 0.0625 - val_loss: 0.0073 - val_mae: 0.0589 - learning_rate: 0.0010
Epoch 25/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0074 - mae: 0.0615 - val_loss: 0.0075 - val_mae: 0.0597 - learning_rate: 0.0010
Epoch 26/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0082 - mae: 0.0646 - val_loss: 0.0075 - val_mae: 0.0610 - learning_rate: 0.0010
Epoch 27/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0078 - mae: 0.0643 - val_loss: 0.0079 - val_mae: 0.0633 - learning_rate: 0.0010
Epoch 28/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0077 - mae: 0.0639 - val_loss: 0.0078 - val_mae: 0.0640 - learning_rate: 0.0010
Epoch 29/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0078 - mae: 0.0641 - val_loss: 0.0074 - val_mae: 0.0590 - learning_rate: 0.0010
Epoch 30/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0076 - mae: 0.0631 - val_loss: 0.0074 - val_mae: 0.0625 - learning_rate: 0.0010
Epoch 31/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0075 - mae: 0.0631 - val_loss: 0.0081 - val_mae: 0.0678 - learning_rate: 0.0010
Epoch 32/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0075 - mae: 0.0641 - val_loss: 0.0077 - val_mae: 0.0606 - learning_rate: 0.0010
Epoch 33/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0077 - mae: 0.0636 - val_loss: 0.0073 - val_mae: 0.0591 - learning_rate: 0.0010
Epoch 34/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0071 - mae: 0.0614 - val_loss: 0.0075 - val_mae: 0.0590 - learning_rate: 0.0010
Epoch 35/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0069 - mae: 0.0595 - val_loss: 0.0074 - val_mae: 0.0633 - learning_rate: 0.0010
Epoch 36/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0069 - mae: 0.0612 - val_loss: 0.0070 - val_mae: 0.0576 - learning_rate: 0.0010
Epoch 37/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0071 - mae: 0.0611 - val_loss: 0.0073 - val_mae: 0.0608 - learning_rate: 0.0010
Epoch 38/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0068 - mae: 0.0601 - val_loss: 0.0076 - val_mae: 0.0633 - learning_rate: 0.0010
Epoch 39/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0069 - mae: 0.0614 - val_loss: 0.0072 - val_mae: 0.0618 - learning_rate: 0.0010
Epoch 40/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0074 - mae: 0.0618 - val_loss: 0.0068 - val_mae: 0.0587 - learning_rate: 0.0010
Epoch 41/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0069 - mae: 0.0602 - val_loss: 0.0071 - val_mae: 0.0623 - learning_rate: 0.0010
Epoch 42/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0071 - mae: 0.0603 - val_loss: 0.0069 - val_mae: 0.0579 - learning_rate: 0.0010
Epoch 43/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0065 - mae: 0.0579 - val_loss: 0.0067 - val_mae: 0.0583 - learning_rate: 0.0010
Epoch 44/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0064 - mae: 0.0567 - val_loss: 0.0067 - val_mae: 0.0552 - learning_rate: 0.0010
Epoch 45/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0069 - mae: 0.0593 - val_loss: 0.0065 - val_mae: 0.0557 - learning_rate: 0.0010
Epoch 46/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0065 - mae: 0.0576 - val_loss: 0.0067 - val_mae: 0.0585 - learning_rate: 0.0010
Epoch 47/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0066 - mae: 0.0574 - val_loss: 0.0060 - val_mae: 0.0533 - learning_rate: 0.0010
Epoch 48/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0063 - mae: 0.0567 - val_loss: 0.0061 - val_mae: 0.0561 - learning_rate: 0.0010
Epoch 49/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 0.0059 - mae: 0.0555 - val_loss: 0.0061 - val_mae: 0.0564 - learning_rate: 0.0010
Epoch 50/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 20ms/step - loss: 0.0061 - mae: 0.0552 - val_loss: 0.0064 - val_mae: 0.0549 - learning_rate: 0.0010
Epoch 51/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0059 - mae: 0.0544 - val_loss: 0.0062 - val_mae: 0.0557 - learning_rate: 0.0010
Epoch 52/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0057 - mae: 0.0547 - val_loss: 0.0058 - val_mae: 0.0533 - learning_rate: 0.0010
Epoch 53/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0059 - mae: 0.0546 - val_loss: 0.0060 - val_mae: 0.0525 - learning_rate: 0.0010
Epoch 54/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0057 - mae: 0.0535 - val_loss: 0.0058 - val_mae: 0.0503 - learning_rate: 0.0010
Epoch 55/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0061 - mae: 0.0546 - val_loss: 0.0053 - val_mae: 0.0492 - learning_rate: 0.0010
Epoch 56/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0058 - mae: 0.0527 - val_loss: 0.0055 - val_mae: 0.0505 - learning_rate: 0.0010
Epoch 57/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0054 - mae: 0.0523 - val_loss: 0.0058 - val_mae: 0.0524 - learning_rate: 0.0010
Epoch 58/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0050 - mae: 0.0508 - val_loss: 0.0067 - val_mae: 0.0558 - learning_rate: 0.0010
Epoch 59/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0059 - mae: 0.0541 - val_loss: 0.0059 - val_mae: 0.0547 - learning_rate: 0.0010
Epoch 60/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0053 - mae: 0.0517 - val_loss: 0.0055 - val_mae: 0.0521 - learning_rate: 0.0010
Epoch 61/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0056 - mae: 0.0527 - val_loss: 0.0048 - val_mae: 0.0458 - learning_rate: 0.0010
Epoch 62/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0050 - mae: 0.0501 - val_loss: 0.0052 - val_mae: 0.0495 - learning_rate: 0.0010
Epoch 63/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0051 - mae: 0.0505 - val_loss: 0.0054 - val_mae: 0.0493 - learning_rate: 0.0010
Epoch 64/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0052 - mae: 0.0506 - val_loss: 0.0056 - val_mae: 0.0513 - learning_rate: 0.0010
Epoch 65/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0052 - mae: 0.0513 - val_loss: 0.0049 - val_mae: 0.0463 - learning_rate: 0.0010
Epoch 66/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0047 - mae: 0.0486 - val_loss: 0.0053 - val_mae: 0.0505 - learning_rate: 0.0010
Epoch 67/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0047 - mae: 0.0489 - val_loss: 0.0053 - val_mae: 0.0484 - learning_rate: 0.0010
Epoch 68/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0045 - mae: 0.0475 - val_loss: 0.0049 - val_mae: 0.0502 - learning_rate: 0.0010
Epoch 69/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0048 - mae: 0.0485 - val_loss: 0.0048 - val_mae: 0.0475 - learning_rate: 0.0010
Epoch 70/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0048 - mae: 0.0486 - val_loss: 0.0046 - val_mae: 0.0456 - learning_rate: 0.0010
Epoch 71/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0046 - mae: 0.0480 - val_loss: 0.0045 - val_mae: 0.0468 - learning_rate: 0.0010
Epoch 72/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0041 - mae: 0.0444 - val_loss: 0.0045 - val_mae: 0.0465 - learning_rate: 0.0010
Epoch 73/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0044 - mae: 0.0458 - val_loss: 0.0042 - val_mae: 0.0447 - learning_rate: 0.0010
Epoch 74/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0045 - mae: 0.0465 - val_loss: 0.0057 - val_mae: 0.0518 - learning_rate: 0.0010
Epoch 75/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0042 - mae: 0.0458 - val_loss: 0.0051 - val_mae: 0.0478 - learning_rate: 0.0010
Epoch 76/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0047 - mae: 0.0488 - val_loss: 0.0046 - val_mae: 0.0454 - learning_rate: 0.0010
Epoch 77/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0048 - mae: 0.0470 - val_loss: 0.0043 - val_mae: 0.0469 - learning_rate: 0.0010
Epoch 78/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0040 - mae: 0.0452 - val_loss: 0.0042 - val_mae: 0.0431 - learning_rate: 0.0010
Epoch 79/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0047 - mae: 0.0484 - val_loss: 0.0039 - val_mae: 0.0414 - learning_rate: 0.0010
Epoch 80/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0044 - mae: 0.0468 - val_loss: 0.0049 - val_mae: 0.0470 - learning_rate: 0.0010
Epoch 81/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0042 - mae: 0.0455 - val_loss: 0.0042 - val_mae: 0.0449 - learning_rate: 0.0010
Epoch 82/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0042 - mae: 0.0456 - val_loss: 0.0037 - val_mae: 0.0423 - learning_rate: 0.0010
Epoch 83/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0039 - mae: 0.0442 - val_loss: 0.0044 - val_mae: 0.0458 - learning_rate: 0.0010
Epoch 84/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0041 - mae: 0.0445 - val_loss: 0.0040 - val_mae: 0.0427 - learning_rate: 0.0010
Epoch 85/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0038 - mae: 0.0433 - val_loss: 0.0043 - val_mae: 0.0434 - learning_rate: 0.0010
Epoch 86/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0041 - mae: 0.0446 - val_loss: 0.0044 - val_mae: 0.0463 - learning_rate: 0.0010
Epoch 87/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0039 - mae: 0.0437 - val_loss: 0.0040 - val_mae: 0.0423 - learning_rate: 0.0010
Epoch 88/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0037 - mae: 0.0426 - val_loss: 0.0039 - val_mae: 0.0430 - learning_rate: 0.0010
Epoch 89/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0037 - mae: 0.0429 - val_loss: 0.0041 - val_mae: 0.0428 - learning_rate: 0.0010
Epoch 90/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0035 - mae: 0.0425 - val_loss: 0.0037 - val_mae: 0.0418 - learning_rate: 0.0010
Epoch 91/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0034 - mae: 0.0417 - val_loss: 0.0037 - val_mae: 0.0407 - learning_rate: 0.0010
Epoch 92/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0035 - mae: 0.0410 - val_loss: 0.0037 - val_mae: 0.0404 - learning_rate: 0.0010
Epoch 93/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0033 - mae: 0.0403 - val_loss: 0.0037 - val_mae: 0.0413 - learning_rate: 0.0010
Epoch 94/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0035 - mae: 0.0413 - val_loss: 0.0037 - val_mae: 0.0396 - learning_rate: 0.0010
Epoch 95/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0034 - mae: 0.0405 - val_loss: 0.0034 - val_mae: 0.0401 - learning_rate: 0.0010
Epoch 96/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0035 - mae: 0.0411 - val_loss: 0.0031 - val_mae: 0.0387 - learning_rate: 0.0010
Epoch 97/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0031 - mae: 0.0389 - val_loss: 0.0037 - val_mae: 0.0394 - learning_rate: 0.0010
Epoch 98/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0034 - mae: 0.0406 - val_loss: 0.0031 - val_mae: 0.0378 - learning_rate: 0.0010
Epoch 99/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0031 - mae: 0.0399 - val_loss: 0.0034 - val_mae: 0.0390 - learning_rate: 0.0010
Epoch 100/100
122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0030 - mae: 0.0384 - val_loss: 0.0031 - val_mae: 0.0378 - learning_rate: 0.0010
52/52 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step
In [21]:
residuals = y_test - y_pred.flatten()
# Plot histogram of residuals
plt.figure(figsize=(10, 6))
plt.hist(residuals, bins=30, color='purple', alpha=0.7)
plt.title('Histogram of Residuals')
plt.xlabel('Residual')
plt.ylabel('Frequency')
plt.show()
In [22]:
r2 = r2_score(y_test, y_pred)
print(f'R-squared: {r2}')
R-squared: 0.9043033169059241
[LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]
In [ ]: